001/* 002 * ==================================================================== 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, 014 * software distributed under the License is distributed on an 015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 016 * KIND, either express or implied. See the License for the 017 * specific language governing permissions and limitations 018 * under the License. 019 * ==================================================================== 020 * 021 * This software consists of voluntary contributions made by many 022 * individuals on behalf of the Apache Software Foundation. For more 023 * information on the Apache Software Foundation, please see 024 * <http://www.apache.org/>. 025 * 026 */ 027 028package org.apache.http.message; 029 030import java.util.BitSet; 031 032import org.apache.http.annotation.ThreadingBehavior; 033import org.apache.http.annotation.Contract; 034import org.apache.http.util.CharArrayBuffer; 035 036/** 037 * Low level parser for header field elements. The parsing routines of this class are designed 038 * to produce near zero intermediate garbage and make no intermediate copies of input data. 039 * <p> 040 * This class is immutable and thread safe. 041 * 042 * @since 4.4 043 */ 044@Contract(threading = ThreadingBehavior.IMMUTABLE) 045public class TokenParser { 046 047 public static BitSet INIT_BITSET(final int ... b) { 048 final BitSet bitset = new BitSet(); 049 for (final int aB : b) { 050 bitset.set(aB); 051 } 052 return bitset; 053 } 054 055 /** US-ASCII CR, carriage return (13) */ 056 public static final char CR = '\r'; 057 058 /** US-ASCII LF, line feed (10) */ 059 public static final char LF = '\n'; 060 061 /** US-ASCII SP, space (32) */ 062 public static final char SP = ' '; 063 064 /** US-ASCII HT, horizontal-tab (9) */ 065 public static final char HT = '\t'; 066 067 /** Double quote */ 068 public static final char DQUOTE = '\"'; 069 070 /** Backward slash / escape character */ 071 public static final char ESCAPE = '\\'; 072 073 public static boolean isWhitespace(final char ch) { 074 return ch == SP || ch == HT || ch == CR || ch == LF; 075 } 076 077 public static final TokenParser INSTANCE = new TokenParser(); 078 079 /** 080 * Extracts from the sequence of chars a token terminated with any of the given delimiters 081 * discarding semantically insignificant whitespace characters. 082 * 083 * @param buf buffer with the sequence of chars to be parsed 084 * @param cursor defines the bounds and current position of the buffer 085 * @param delimiters set of delimiting characters. Can be {@code null} if the token 086 * is not delimited by any character. 087 */ 088 public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { 089 final StringBuilder dst = new StringBuilder(); 090 boolean whitespace = false; 091 while (!cursor.atEnd()) { 092 final char current = buf.charAt(cursor.getPos()); 093 if (delimiters != null && delimiters.get(current)) { 094 break; 095 } else if (isWhitespace(current)) { 096 skipWhiteSpace(buf, cursor); 097 whitespace = true; 098 } else { 099 if (whitespace && dst.length() > 0) { 100 dst.append(' '); 101 } 102 copyContent(buf, cursor, delimiters, dst); 103 whitespace = false; 104 } 105 } 106 return dst.toString(); 107 } 108 109 /** 110 * Extracts from the sequence of chars a value which can be enclosed in quote marks and 111 * terminated with any of the given delimiters discarding semantically insignificant 112 * whitespace characters. 113 * 114 * @param buf buffer with the sequence of chars to be parsed 115 * @param cursor defines the bounds and current position of the buffer 116 * @param delimiters set of delimiting characters. Can be {@code null} if the value 117 * is not delimited by any character. 118 */ 119 public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { 120 final StringBuilder dst = new StringBuilder(); 121 boolean whitespace = false; 122 while (!cursor.atEnd()) { 123 final char current = buf.charAt(cursor.getPos()); 124 if (delimiters != null && delimiters.get(current)) { 125 break; 126 } else if (isWhitespace(current)) { 127 skipWhiteSpace(buf, cursor); 128 whitespace = true; 129 } else if (current == DQUOTE) { 130 if (whitespace && dst.length() > 0) { 131 dst.append(' '); 132 } 133 copyQuotedContent(buf, cursor, dst); 134 whitespace = false; 135 } else { 136 if (whitespace && dst.length() > 0) { 137 dst.append(' '); 138 } 139 copyUnquotedContent(buf, cursor, delimiters, dst); 140 whitespace = false; 141 } 142 } 143 return dst.toString(); 144 } 145 146 /** 147 * Skips semantically insignificant whitespace characters and moves the cursor to the closest 148 * non-whitespace character. 149 * 150 * @param buf buffer with the sequence of chars to be parsed 151 * @param cursor defines the bounds and current position of the buffer 152 */ 153 public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) { 154 int pos = cursor.getPos(); 155 final int indexFrom = cursor.getPos(); 156 final int indexTo = cursor.getUpperBound(); 157 for (int i = indexFrom; i < indexTo; i++) { 158 final char current = buf.charAt(i); 159 if (!isWhitespace(current)) { 160 break; 161 } else { 162 pos++; 163 } 164 } 165 cursor.updatePos(pos); 166 } 167 168 /** 169 * Transfers content into the destination buffer until a whitespace character or any of 170 * the given delimiters is encountered. 171 * 172 * @param buf buffer with the sequence of chars to be parsed 173 * @param cursor defines the bounds and current position of the buffer 174 * @param delimiters set of delimiting characters. Can be {@code null} if the value 175 * is delimited by a whitespace only. 176 * @param dst destination buffer 177 */ 178 public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, 179 final StringBuilder dst) { 180 int pos = cursor.getPos(); 181 final int indexFrom = cursor.getPos(); 182 final int indexTo = cursor.getUpperBound(); 183 for (int i = indexFrom; i < indexTo; i++) { 184 final char current = buf.charAt(i); 185 if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) { 186 break; 187 } else { 188 pos++; 189 dst.append(current); 190 } 191 } 192 cursor.updatePos(pos); 193 } 194 195 /** 196 * Transfers content into the destination buffer until a whitespace character, a quote, 197 * or any of the given delimiters is encountered. 198 * 199 * @param buf buffer with the sequence of chars to be parsed 200 * @param cursor defines the bounds and current position of the buffer 201 * @param delimiters set of delimiting characters. Can be {@code null} if the value 202 * is delimited by a whitespace or a quote only. 203 * @param dst destination buffer 204 */ 205 public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor, 206 final BitSet delimiters, final StringBuilder dst) { 207 int pos = cursor.getPos(); 208 final int indexFrom = cursor.getPos(); 209 final int indexTo = cursor.getUpperBound(); 210 for (int i = indexFrom; i < indexTo; i++) { 211 final char current = buf.charAt(i); 212 if ((delimiters != null && delimiters.get(current)) 213 || isWhitespace(current) || current == DQUOTE) { 214 break; 215 } else { 216 pos++; 217 dst.append(current); 218 } 219 } 220 cursor.updatePos(pos); 221 } 222 223 /** 224 * Transfers content enclosed with quote marks into the destination buffer. 225 * 226 * @param buf buffer with the sequence of chars to be parsed 227 * @param cursor defines the bounds and current position of the buffer 228 * @param dst destination buffer 229 */ 230 public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor, 231 final StringBuilder dst) { 232 if (cursor.atEnd()) { 233 return; 234 } 235 int pos = cursor.getPos(); 236 int indexFrom = cursor.getPos(); 237 final int indexTo = cursor.getUpperBound(); 238 char current = buf.charAt(pos); 239 if (current != DQUOTE) { 240 return; 241 } 242 pos++; 243 indexFrom++; 244 boolean escaped = false; 245 for (int i = indexFrom; i < indexTo; i++, pos++) { 246 current = buf.charAt(i); 247 if (escaped) { 248 if (current != DQUOTE && current != ESCAPE) { 249 dst.append(ESCAPE); 250 } 251 dst.append(current); 252 escaped = false; 253 } else { 254 if (current == DQUOTE) { 255 pos++; 256 break; 257 } 258 if (current == ESCAPE) { 259 escaped = true; 260 } else if (current != CR && current != LF) { 261 dst.append(current); 262 } 263 } 264 } 265 cursor.updatePos(pos); 266 } 267 268}