001/*
002 * ====================================================================
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *   http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing,
014 * software distributed under the License is distributed on an
015 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
016 * KIND, either express or implied.  See the License for the
017 * specific language governing permissions and limitations
018 * under the License.
019 * ====================================================================
020 *
021 * This software consists of voluntary contributions made by many
022 * individuals on behalf of the Apache Software Foundation.  For more
023 * information on the Apache Software Foundation, please see
024 * <http://www.apache.org/>.
025 *
026 */
027package org.apache.http.client.utils;
028
029import java.net.URI;
030import java.net.URISyntaxException;
031import java.util.List;
032import java.util.Locale;
033import java.util.Stack;
034
035import org.apache.http.HttpHost;
036import org.apache.http.conn.routing.RouteInfo;
037import org.apache.http.util.Args;
038import org.apache.http.util.TextUtils;
039
040/**
041 * A collection of utilities for {@link URI URIs}, to workaround
042 * bugs within the class or for ease-of-use features.
043 *
044 * @since 4.0
045 */
046public class URIUtils {
047
048     /**
049         * Constructs a {@link URI} using all the parameters. This should be
050         * used instead of
051         * {@link URI#URI(String, String, String, int, String, String, String)}
052         * or any of the other URI multi-argument URI constructors.
053         *
054         * @param scheme
055         *            Scheme name
056         * @param host
057         *            Host name
058         * @param port
059         *            Port number
060         * @param path
061         *            Path
062         * @param query
063         *            Query
064         * @param fragment
065         *            Fragment
066         *
067         * @throws URISyntaxException
068         *             If both a scheme and a path are given but the path is
069         *             relative, if the URI string constructed from the given
070         *             components violates RFC&nbsp;2396, or if the authority
071         *             component of the string is present but cannot be parsed
072         *             as a server-based authority
073         *
074         * @deprecated (4.2) use {@link URIBuilder}.
075         */
076    @Deprecated
077    public static URI createURI(
078            final String scheme,
079            final String host,
080            final int port,
081            final String path,
082            final String query,
083            final String fragment) throws URISyntaxException {
084        final StringBuilder buffer = new StringBuilder();
085        if (host != null) {
086            if (scheme != null) {
087                buffer.append(scheme);
088                buffer.append("://");
089            }
090            buffer.append(host);
091            if (port > 0) {
092                buffer.append(':');
093                buffer.append(port);
094            }
095        }
096        if (path == null || !path.startsWith("/")) {
097            buffer.append('/');
098        }
099        if (path != null) {
100            buffer.append(path);
101        }
102        if (query != null) {
103            buffer.append('?');
104            buffer.append(query);
105        }
106        if (fragment != null) {
107            buffer.append('#');
108            buffer.append(fragment);
109        }
110        return new URI(buffer.toString());
111    }
112
113    /**
114     * A convenience method for creating a new {@link URI} whose scheme, host
115     * and port are taken from the target host, but whose path, query and
116     * fragment are taken from the existing URI. The fragment is only used if
117     * dropFragment is false. The path is set to "/" if not explicitly specified.
118     *
119     * @param uri
120     *            Contains the path, query and fragment to use.
121     * @param target
122     *            Contains the scheme, host and port to use.
123     * @param dropFragment
124     *            True if the fragment should not be copied.
125     *
126     * @throws URISyntaxException
127     *             If the resulting URI is invalid.
128     */
129    public static URI rewriteURI(
130            final URI uri,
131            final HttpHost target,
132            final boolean dropFragment) throws URISyntaxException {
133        Args.notNull(uri, "URI");
134        if (uri.isOpaque()) {
135            return uri;
136        }
137        final URIBuilder uribuilder = new URIBuilder(uri);
138        if (target != null) {
139            uribuilder.setScheme(target.getSchemeName());
140            uribuilder.setHost(target.getHostName());
141            uribuilder.setPort(target.getPort());
142        } else {
143            uribuilder.setScheme(null);
144            uribuilder.setHost(null);
145            uribuilder.setPort(-1);
146        }
147        if (dropFragment) {
148            uribuilder.setFragment(null);
149        }
150        if (TextUtils.isEmpty(uribuilder.getPath())) {
151            uribuilder.setPath("/");
152        }
153        return uribuilder.build();
154    }
155
156    /**
157     * A convenience method for
158     * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
159     * fragment.
160     */
161    public static URI rewriteURI(
162            final URI uri,
163            final HttpHost target) throws URISyntaxException {
164        return rewriteURI(uri, target, false);
165    }
166
167    /**
168     * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
169     * query are taken from the existing URI, dropping any fragment or user-information.
170     * The path is set to "/" if not explicitly specified. The existing URI is returned
171     * unmodified if it has no fragment or user-information and has a path.
172     *
173     * @param uri
174     *            original URI.
175     * @throws URISyntaxException
176     *             If the resulting URI is invalid.
177     */
178    public static URI rewriteURI(final URI uri) throws URISyntaxException {
179        Args.notNull(uri, "URI");
180        if (uri.isOpaque()) {
181            return uri;
182        }
183        final URIBuilder uribuilder = new URIBuilder(uri);
184        if (uribuilder.getUserInfo() != null) {
185            uribuilder.setUserInfo(null);
186        }
187        if (TextUtils.isEmpty(uribuilder.getPath())) {
188            uribuilder.setPath("/");
189        }
190        if (uribuilder.getHost() != null) {
191            uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
192        }
193        uribuilder.setFragment(null);
194        return uribuilder.build();
195    }
196
197    /**
198     * A convenience method that optionally converts the original {@link java.net.URI} either
199     * to a relative or an absolute form as required by the specified route.
200     *
201     * @param uri
202     *            original URI.
203     * @throws URISyntaxException
204     *             If the resulting URI is invalid.
205     *
206     * @since 4.4
207     */
208    public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
209        if (uri == null) {
210            return null;
211        }
212        if (route.getProxyHost() != null && !route.isTunnelled()) {
213            // Make sure the request URI is absolute
214            if (!uri.isAbsolute()) {
215                final HttpHost target = route.getTargetHost();
216                return rewriteURI(uri, target, true);
217            } else {
218                return rewriteURI(uri);
219            }
220        } else {
221            // Make sure the request URI is relative
222            if (uri.isAbsolute()) {
223                return rewriteURI(uri, null, true);
224            } else {
225                return rewriteURI(uri);
226            }
227        }
228    }
229
230    /**
231     * Resolves a URI reference against a base URI. Work-around for bug in
232     * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
233     *
234     * @param baseURI the base URI
235     * @param reference the URI reference
236     * @return the resulting URI
237     */
238    public static URI resolve(final URI baseURI, final String reference) {
239        return resolve(baseURI, URI.create(reference));
240    }
241
242    /**
243     * Resolves a URI reference against a base URI. Work-around for bugs in
244     * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
245     *
246     * @param baseURI the base URI
247     * @param reference the URI reference
248     * @return the resulting URI
249     */
250    public static URI resolve(final URI baseURI, final URI reference){
251        Args.notNull(baseURI, "Base URI");
252        Args.notNull(reference, "Reference URI");
253        final String s = reference.toASCIIString();
254        if (s.startsWith("?")) {
255            String baseUri = baseURI.toASCIIString();
256            final int i = baseUri.indexOf('?');
257            baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
258            return URI.create(baseUri + s);
259        }
260        final boolean emptyReference = s.isEmpty();
261        URI resolved;
262        if (emptyReference) {
263            resolved = baseURI.resolve(URI.create("#"));
264            final String resolvedString = resolved.toASCIIString();
265            resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
266        } else {
267            resolved = baseURI.resolve(reference);
268        }
269        try {
270            return normalizeSyntax(resolved);
271        } catch (final URISyntaxException ex) {
272            throw new IllegalArgumentException(ex);
273        }
274    }
275
276    /**
277     * Removes dot segments according to RFC 3986, section 5.2.4 and
278     * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
279     *
280     * @param uri the original URI
281     * @return the URI without dot segments
282     */
283    static URI normalizeSyntax(final URI uri) throws URISyntaxException {
284        if (uri.isOpaque() || uri.getAuthority() == null) {
285            // opaque and file: URIs
286            return uri;
287        }
288        Args.check(uri.isAbsolute(), "Base URI must be absolute");
289        final URIBuilder builder = new URIBuilder(uri);
290        final String path = builder.getPath();
291        if (path != null && !path.equals("/")) {
292            final String[] inputSegments = path.split("/");
293            final Stack<String> outputSegments = new Stack<String>();
294            for (final String inputSegment : inputSegments) {
295                if ((inputSegment.isEmpty()) || (".".equals(inputSegment))) {
296                    // Do nothing
297                } else if ("..".equals(inputSegment)) {
298                    if (!outputSegments.isEmpty()) {
299                        outputSegments.pop();
300                    }
301                } else {
302                    outputSegments.push(inputSegment);
303                }
304            }
305            final StringBuilder outputBuffer = new StringBuilder();
306            for (final String outputSegment : outputSegments) {
307                outputBuffer.append('/').append(outputSegment);
308            }
309            if (path.lastIndexOf('/') == path.length() - 1) {
310                // path.endsWith("/") || path.equals("")
311                outputBuffer.append('/');
312            }
313            builder.setPath(outputBuffer.toString());
314        }
315        if (builder.getScheme() != null) {
316            builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
317        }
318        if (builder.getHost() != null) {
319            builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
320        }
321        return builder.build();
322    }
323
324    /**
325     * Extracts target host from the given {@link URI}.
326     *
327     * @param uri
328     * @return the target host if the URI is absolute or {@code null} if the URI is
329     * relative or does not contain a valid host name.
330     *
331     * @since 4.1
332     */
333    public static HttpHost extractHost(final URI uri) {
334        if (uri == null) {
335            return null;
336        }
337        HttpHost target = null;
338        if (uri.isAbsolute()) {
339            int port = uri.getPort(); // may be overridden later
340            String host = uri.getHost();
341            if (host == null) { // normal parse failed; let's do it ourselves
342                // authority does not seem to care about the valid character-set for host names
343                host = uri.getAuthority();
344                if (host != null) {
345                    // Strip off any leading user credentials
346                    final int at = host.indexOf('@');
347                    if (at >= 0) {
348                        if (host.length() > at+1 ) {
349                            host = host.substring(at+1);
350                        } else {
351                            host = null; // @ on its own
352                        }
353                    }
354                    // Extract the port suffix, if present
355                    if (host != null) {
356                        final int colon = host.indexOf(':');
357                        if (colon >= 0) {
358                            final int pos = colon + 1;
359                            int len = 0;
360                            for (int i = pos; i < host.length(); i++) {
361                                if (Character.isDigit(host.charAt(i))) {
362                                    len++;
363                                } else {
364                                    break;
365                                }
366                            }
367                            if (len > 0) {
368                                try {
369                                    port = Integer.parseInt(host.substring(pos, pos + len));
370                                } catch (final NumberFormatException ex) {
371                                }
372                            }
373                            host = host.substring(0, colon);
374                        }
375                    }
376                }
377            }
378            final String scheme = uri.getScheme();
379            if (!TextUtils.isBlank(host)) {
380                try {
381                    target = new HttpHost(host, port, scheme);
382                } catch (final IllegalArgumentException ignore) {
383                }
384            }
385        }
386        return target;
387    }
388
389    /**
390     * Derives the interpreted (absolute) URI that was used to generate the last
391     * request. This is done by extracting the request-uri and target origin for
392     * the last request and scanning all the redirect locations for the last
393     * fragment identifier, then combining the result into a {@link URI}.
394     *
395     * @param originalURI
396     *            original request before any redirects
397     * @param target
398     *            if the last URI is relative, it is resolved against this target,
399     *            or {@code null} if not available.
400     * @param redirects
401     *            collection of redirect locations since the original request
402     *            or {@code null} if not available.
403     * @return interpreted (absolute) URI
404     */
405    public static URI resolve(
406            final URI originalURI,
407            final HttpHost target,
408            final List<URI> redirects) throws URISyntaxException {
409        Args.notNull(originalURI, "Request URI");
410        final URIBuilder uribuilder;
411        if (redirects == null || redirects.isEmpty()) {
412            uribuilder = new URIBuilder(originalURI);
413        } else {
414            uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
415            String frag = uribuilder.getFragment();
416            // read interpreted fragment identifier from redirect locations
417            for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
418                frag = redirects.get(i).getFragment();
419            }
420            uribuilder.setFragment(frag);
421        }
422        // read interpreted fragment identifier from original request
423        if (uribuilder.getFragment() == null) {
424            uribuilder.setFragment(originalURI.getFragment());
425        }
426        // last target origin
427        if (target != null && !uribuilder.isAbsolute()) {
428            uribuilder.setScheme(target.getSchemeName());
429            uribuilder.setHost(target.getHostName());
430            uribuilder.setPort(target.getPort());
431        }
432        return uribuilder.build();
433    }
434
435    /**
436     * This class should not be instantiated.
437     */
438    private URIUtils() {
439    }
440
441}