001/* 002 * ==================================================================== 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, 014 * software distributed under the License is distributed on an 015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 016 * KIND, either express or implied. See the License for the 017 * specific language governing permissions and limitations 018 * under the License. 019 * ==================================================================== 020 * 021 * This software consists of voluntary contributions made by many 022 * individuals on behalf of the Apache Software Foundation. For more 023 * information on the Apache Software Foundation, please see 024 * <http://www.apache.org/>. 025 * 026 */ 027package org.apache.http.client.utils; 028 029import java.net.URI; 030import java.net.URISyntaxException; 031import java.util.List; 032import java.util.Locale; 033import java.util.Stack; 034 035import org.apache.http.HttpHost; 036import org.apache.http.conn.routing.RouteInfo; 037import org.apache.http.util.Args; 038import org.apache.http.util.TextUtils; 039 040/** 041 * A collection of utilities for {@link URI URIs}, to workaround 042 * bugs within the class or for ease-of-use features. 043 * 044 * @since 4.0 045 */ 046public class URIUtils { 047 048 /** 049 * Constructs a {@link URI} using all the parameters. This should be 050 * used instead of 051 * {@link URI#URI(String, String, String, int, String, String, String)} 052 * or any of the other URI multi-argument URI constructors. 053 * 054 * @param scheme 055 * Scheme name 056 * @param host 057 * Host name 058 * @param port 059 * Port number 060 * @param path 061 * Path 062 * @param query 063 * Query 064 * @param fragment 065 * Fragment 066 * 067 * @throws URISyntaxException 068 * If both a scheme and a path are given but the path is 069 * relative, if the URI string constructed from the given 070 * components violates RFC 2396, or if the authority 071 * component of the string is present but cannot be parsed 072 * as a server-based authority 073 * 074 * @deprecated (4.2) use {@link URIBuilder}. 075 */ 076 @Deprecated 077 public static URI createURI( 078 final String scheme, 079 final String host, 080 final int port, 081 final String path, 082 final String query, 083 final String fragment) throws URISyntaxException { 084 final StringBuilder buffer = new StringBuilder(); 085 if (host != null) { 086 if (scheme != null) { 087 buffer.append(scheme); 088 buffer.append("://"); 089 } 090 buffer.append(host); 091 if (port > 0) { 092 buffer.append(':'); 093 buffer.append(port); 094 } 095 } 096 if (path == null || !path.startsWith("/")) { 097 buffer.append('/'); 098 } 099 if (path != null) { 100 buffer.append(path); 101 } 102 if (query != null) { 103 buffer.append('?'); 104 buffer.append(query); 105 } 106 if (fragment != null) { 107 buffer.append('#'); 108 buffer.append(fragment); 109 } 110 return new URI(buffer.toString()); 111 } 112 113 /** 114 * A convenience method for creating a new {@link URI} whose scheme, host 115 * and port are taken from the target host, but whose path, query and 116 * fragment are taken from the existing URI. The fragment is only used if 117 * dropFragment is false. The path is set to "/" if not explicitly specified. 118 * 119 * @param uri 120 * Contains the path, query and fragment to use. 121 * @param target 122 * Contains the scheme, host and port to use. 123 * @param dropFragment 124 * True if the fragment should not be copied. 125 * 126 * @throws URISyntaxException 127 * If the resulting URI is invalid. 128 */ 129 public static URI rewriteURI( 130 final URI uri, 131 final HttpHost target, 132 final boolean dropFragment) throws URISyntaxException { 133 Args.notNull(uri, "URI"); 134 if (uri.isOpaque()) { 135 return uri; 136 } 137 final URIBuilder uribuilder = new URIBuilder(uri); 138 if (target != null) { 139 uribuilder.setScheme(target.getSchemeName()); 140 uribuilder.setHost(target.getHostName()); 141 uribuilder.setPort(target.getPort()); 142 } else { 143 uribuilder.setScheme(null); 144 uribuilder.setHost(null); 145 uribuilder.setPort(-1); 146 } 147 if (dropFragment) { 148 uribuilder.setFragment(null); 149 } 150 if (TextUtils.isEmpty(uribuilder.getPath())) { 151 uribuilder.setPath("/"); 152 } 153 return uribuilder.build(); 154 } 155 156 /** 157 * A convenience method for 158 * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the 159 * fragment. 160 */ 161 public static URI rewriteURI( 162 final URI uri, 163 final HttpHost target) throws URISyntaxException { 164 return rewriteURI(uri, target, false); 165 } 166 167 /** 168 * A convenience method that creates a new {@link URI} whose scheme, host, port, path, 169 * query are taken from the existing URI, dropping any fragment or user-information. 170 * The path is set to "/" if not explicitly specified. The existing URI is returned 171 * unmodified if it has no fragment or user-information and has a path. 172 * 173 * @param uri 174 * original URI. 175 * @throws URISyntaxException 176 * If the resulting URI is invalid. 177 */ 178 public static URI rewriteURI(final URI uri) throws URISyntaxException { 179 Args.notNull(uri, "URI"); 180 if (uri.isOpaque()) { 181 return uri; 182 } 183 final URIBuilder uribuilder = new URIBuilder(uri); 184 if (uribuilder.getUserInfo() != null) { 185 uribuilder.setUserInfo(null); 186 } 187 if (TextUtils.isEmpty(uribuilder.getPath())) { 188 uribuilder.setPath("/"); 189 } 190 if (uribuilder.getHost() != null) { 191 uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT)); 192 } 193 uribuilder.setFragment(null); 194 return uribuilder.build(); 195 } 196 197 /** 198 * A convenience method that optionally converts the original {@link java.net.URI} either 199 * to a relative or an absolute form as required by the specified route. 200 * 201 * @param uri 202 * original URI. 203 * @throws URISyntaxException 204 * If the resulting URI is invalid. 205 * 206 * @since 4.4 207 */ 208 public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException { 209 if (uri == null) { 210 return null; 211 } 212 if (route.getProxyHost() != null && !route.isTunnelled()) { 213 // Make sure the request URI is absolute 214 if (!uri.isAbsolute()) { 215 final HttpHost target = route.getTargetHost(); 216 return rewriteURI(uri, target, true); 217 } else { 218 return rewriteURI(uri); 219 } 220 } else { 221 // Make sure the request URI is relative 222 if (uri.isAbsolute()) { 223 return rewriteURI(uri, null, true); 224 } else { 225 return rewriteURI(uri); 226 } 227 } 228 } 229 230 /** 231 * Resolves a URI reference against a base URI. Work-around for bug in 232 * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535) 233 * 234 * @param baseURI the base URI 235 * @param reference the URI reference 236 * @return the resulting URI 237 */ 238 public static URI resolve(final URI baseURI, final String reference) { 239 return resolve(baseURI, URI.create(reference)); 240 } 241 242 /** 243 * Resolves a URI reference against a base URI. Work-around for bugs in 244 * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535) 245 * 246 * @param baseURI the base URI 247 * @param reference the URI reference 248 * @return the resulting URI 249 */ 250 public static URI resolve(final URI baseURI, final URI reference){ 251 Args.notNull(baseURI, "Base URI"); 252 Args.notNull(reference, "Reference URI"); 253 final String s = reference.toASCIIString(); 254 if (s.startsWith("?")) { 255 String baseUri = baseURI.toASCIIString(); 256 final int i = baseUri.indexOf('?'); 257 baseUri = i > -1 ? baseUri.substring(0, i) : baseUri; 258 return URI.create(baseUri + s); 259 } 260 final boolean emptyReference = s.isEmpty(); 261 URI resolved; 262 if (emptyReference) { 263 resolved = baseURI.resolve(URI.create("#")); 264 final String resolvedString = resolved.toASCIIString(); 265 resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#'))); 266 } else { 267 resolved = baseURI.resolve(reference); 268 } 269 try { 270 return normalizeSyntax(resolved); 271 } catch (final URISyntaxException ex) { 272 throw new IllegalArgumentException(ex); 273 } 274 } 275 276 /** 277 * Removes dot segments according to RFC 3986, section 5.2.4 and 278 * Syntax-Based Normalization according to RFC 3986, section 6.2.2. 279 * 280 * @param uri the original URI 281 * @return the URI without dot segments 282 */ 283 static URI normalizeSyntax(final URI uri) throws URISyntaxException { 284 if (uri.isOpaque() || uri.getAuthority() == null) { 285 // opaque and file: URIs 286 return uri; 287 } 288 Args.check(uri.isAbsolute(), "Base URI must be absolute"); 289 final URIBuilder builder = new URIBuilder(uri); 290 final String path = builder.getPath(); 291 if (path != null && !path.equals("/")) { 292 final String[] inputSegments = path.split("/"); 293 final Stack<String> outputSegments = new Stack<String>(); 294 for (final String inputSegment : inputSegments) { 295 if ((inputSegment.isEmpty()) || (".".equals(inputSegment))) { 296 // Do nothing 297 } else if ("..".equals(inputSegment)) { 298 if (!outputSegments.isEmpty()) { 299 outputSegments.pop(); 300 } 301 } else { 302 outputSegments.push(inputSegment); 303 } 304 } 305 final StringBuilder outputBuffer = new StringBuilder(); 306 for (final String outputSegment : outputSegments) { 307 outputBuffer.append('/').append(outputSegment); 308 } 309 if (path.lastIndexOf('/') == path.length() - 1) { 310 // path.endsWith("/") || path.equals("") 311 outputBuffer.append('/'); 312 } 313 builder.setPath(outputBuffer.toString()); 314 } 315 if (builder.getScheme() != null) { 316 builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT)); 317 } 318 if (builder.getHost() != null) { 319 builder.setHost(builder.getHost().toLowerCase(Locale.ROOT)); 320 } 321 return builder.build(); 322 } 323 324 /** 325 * Extracts target host from the given {@link URI}. 326 * 327 * @param uri 328 * @return the target host if the URI is absolute or {@code null} if the URI is 329 * relative or does not contain a valid host name. 330 * 331 * @since 4.1 332 */ 333 public static HttpHost extractHost(final URI uri) { 334 if (uri == null) { 335 return null; 336 } 337 HttpHost target = null; 338 if (uri.isAbsolute()) { 339 int port = uri.getPort(); // may be overridden later 340 String host = uri.getHost(); 341 if (host == null) { // normal parse failed; let's do it ourselves 342 // authority does not seem to care about the valid character-set for host names 343 host = uri.getAuthority(); 344 if (host != null) { 345 // Strip off any leading user credentials 346 final int at = host.indexOf('@'); 347 if (at >= 0) { 348 if (host.length() > at+1 ) { 349 host = host.substring(at+1); 350 } else { 351 host = null; // @ on its own 352 } 353 } 354 // Extract the port suffix, if present 355 if (host != null) { 356 final int colon = host.indexOf(':'); 357 if (colon >= 0) { 358 final int pos = colon + 1; 359 int len = 0; 360 for (int i = pos; i < host.length(); i++) { 361 if (Character.isDigit(host.charAt(i))) { 362 len++; 363 } else { 364 break; 365 } 366 } 367 if (len > 0) { 368 try { 369 port = Integer.parseInt(host.substring(pos, pos + len)); 370 } catch (final NumberFormatException ex) { 371 } 372 } 373 host = host.substring(0, colon); 374 } 375 } 376 } 377 } 378 final String scheme = uri.getScheme(); 379 if (!TextUtils.isBlank(host)) { 380 try { 381 target = new HttpHost(host, port, scheme); 382 } catch (final IllegalArgumentException ignore) { 383 } 384 } 385 } 386 return target; 387 } 388 389 /** 390 * Derives the interpreted (absolute) URI that was used to generate the last 391 * request. This is done by extracting the request-uri and target origin for 392 * the last request and scanning all the redirect locations for the last 393 * fragment identifier, then combining the result into a {@link URI}. 394 * 395 * @param originalURI 396 * original request before any redirects 397 * @param target 398 * if the last URI is relative, it is resolved against this target, 399 * or {@code null} if not available. 400 * @param redirects 401 * collection of redirect locations since the original request 402 * or {@code null} if not available. 403 * @return interpreted (absolute) URI 404 */ 405 public static URI resolve( 406 final URI originalURI, 407 final HttpHost target, 408 final List<URI> redirects) throws URISyntaxException { 409 Args.notNull(originalURI, "Request URI"); 410 final URIBuilder uribuilder; 411 if (redirects == null || redirects.isEmpty()) { 412 uribuilder = new URIBuilder(originalURI); 413 } else { 414 uribuilder = new URIBuilder(redirects.get(redirects.size() - 1)); 415 String frag = uribuilder.getFragment(); 416 // read interpreted fragment identifier from redirect locations 417 for (int i = redirects.size() - 1; frag == null && i >= 0; i--) { 418 frag = redirects.get(i).getFragment(); 419 } 420 uribuilder.setFragment(frag); 421 } 422 // read interpreted fragment identifier from original request 423 if (uribuilder.getFragment() == null) { 424 uribuilder.setFragment(originalURI.getFragment()); 425 } 426 // last target origin 427 if (target != null && !uribuilder.isAbsolute()) { 428 uribuilder.setScheme(target.getSchemeName()); 429 uribuilder.setHost(target.getHostName()); 430 uribuilder.setPort(target.getPort()); 431 } 432 return uribuilder.build(); 433 } 434 435 /** 436 * This class should not be instantiated. 437 */ 438 private URIUtils() { 439 } 440 441}