001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.UncheckedIOException; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ConstrainableInputStream; 008import org.jsoup.nodes.Document; 009import org.jsoup.parser.Parser; 010import org.jsoup.parser.TokenQueue; 011 012import javax.net.ssl.HostnameVerifier; 013import javax.net.ssl.HttpsURLConnection; 014import javax.net.ssl.SSLContext; 015import javax.net.ssl.SSLSession; 016import javax.net.ssl.SSLSocketFactory; 017import javax.net.ssl.TrustManager; 018import javax.net.ssl.X509TrustManager; 019import java.io.BufferedInputStream; 020import java.io.BufferedWriter; 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.io.OutputStreamWriter; 026import java.io.UnsupportedEncodingException; 027import java.net.HttpURLConnection; 028import java.net.InetSocketAddress; 029import java.net.MalformedURLException; 030import java.net.Proxy; 031import java.net.URI; 032import java.net.URL; 033import java.net.URLEncoder; 034import java.nio.Buffer; 035import java.nio.ByteBuffer; 036import java.nio.charset.Charset; 037import java.nio.charset.IllegalCharsetNameException; 038import java.security.KeyManagementException; 039import java.security.NoSuchAlgorithmException; 040import java.security.cert.X509Certificate; 041import java.util.ArrayList; 042import java.util.Collection; 043import java.util.Collections; 044import java.util.LinkedHashMap; 045import java.util.List; 046import java.util.Map; 047import java.util.regex.Pattern; 048import java.util.zip.GZIPInputStream; 049 050import static org.jsoup.Connection.Method.HEAD; 051import static org.jsoup.internal.Normalizer.lowerCase; 052 053/** 054 * Implementation of {@link Connection}. 055 * @see org.jsoup.Jsoup#connect(String) 056 */ 057public class HttpConnection implements Connection { 058 public static final String CONTENT_ENCODING = "Content-Encoding"; 059 /** 060 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 061 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 062 */ 063 public static final String DEFAULT_UA = 064 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"; 065 private static final String USER_AGENT = "User-Agent"; 066 private static final String CONTENT_TYPE = "Content-Type"; 067 private static final String MULTIPART_FORM_DATA = "multipart/form-data"; 068 private static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 069 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 070 private static final String DefaultUploadType = "application/octet-stream"; 071 072 public static Connection connect(String url) { 073 Connection con = new HttpConnection(); 074 con.url(url); 075 return con; 076 } 077 078 public static Connection connect(URL url) { 079 Connection con = new HttpConnection(); 080 con.url(url); 081 return con; 082 } 083 084 /** 085 * Encodes the input URL into a safe ASCII URL string 086 * @param url unescaped URL 087 * @return escaped URL 088 */ 089 private static String encodeUrl(String url) { 090 try { 091 URL u = new URL(url); 092 return encodeUrl(u).toExternalForm(); 093 } catch (Exception e) { 094 return url; 095 } 096 } 097 098 static URL encodeUrl(URL u) { 099 try { 100 // odd way to encode urls, but it works! 101 String urlS = u.toExternalForm(); // URL external form may have spaces which is illegal in new URL() (odd asymmetry) 102 urlS = urlS.replaceAll(" ", "%20"); 103 final URI uri = new URI(urlS); 104 return new URL(uri.toASCIIString()); 105 } catch (Exception e) { 106 return u; 107 } 108 } 109 110 private static String encodeMimeName(String val) { 111 if (val == null) 112 return null; 113 return val.replaceAll("\"", "%22"); 114 } 115 116 private Connection.Request req; 117 private Connection.Response res; 118 119 private HttpConnection() { 120 req = new Request(); 121 res = new Response(); 122 } 123 124 public Connection url(URL url) { 125 req.url(url); 126 return this; 127 } 128 129 public Connection url(String url) { 130 Validate.notEmpty(url, "Must supply a valid URL"); 131 try { 132 req.url(new URL(encodeUrl(url))); 133 } catch (MalformedURLException e) { 134 throw new IllegalArgumentException("Malformed URL: " + url, e); 135 } 136 return this; 137 } 138 139 public Connection proxy(Proxy proxy) { 140 req.proxy(proxy); 141 return this; 142 } 143 144 public Connection proxy(String host, int port) { 145 req.proxy(host, port); 146 return this; 147 } 148 149 public Connection userAgent(String userAgent) { 150 Validate.notNull(userAgent, "User agent must not be null"); 151 req.header(USER_AGENT, userAgent); 152 return this; 153 } 154 155 public Connection timeout(int millis) { 156 req.timeout(millis); 157 return this; 158 } 159 160 public Connection maxBodySize(int bytes) { 161 req.maxBodySize(bytes); 162 return this; 163 } 164 165 public Connection followRedirects(boolean followRedirects) { 166 req.followRedirects(followRedirects); 167 return this; 168 } 169 170 public Connection referrer(String referrer) { 171 Validate.notNull(referrer, "Referrer must not be null"); 172 req.header("Referer", referrer); 173 return this; 174 } 175 176 public Connection method(Method method) { 177 req.method(method); 178 return this; 179 } 180 181 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 182 req.ignoreHttpErrors(ignoreHttpErrors); 183 return this; 184 } 185 186 public Connection ignoreContentType(boolean ignoreContentType) { 187 req.ignoreContentType(ignoreContentType); 188 return this; 189 } 190 191 public Connection validateTLSCertificates(boolean value) { 192 req.validateTLSCertificates(value); 193 return this; 194 } 195 196 public Connection data(String key, String value) { 197 req.data(KeyVal.create(key, value)); 198 return this; 199 } 200 201 public Connection data(String key, String filename, InputStream inputStream) { 202 req.data(KeyVal.create(key, filename, inputStream)); 203 return this; 204 } 205 206 @Override 207 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 208 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 209 return this; 210 } 211 212 public Connection data(Map<String, String> data) { 213 Validate.notNull(data, "Data map must not be null"); 214 for (Map.Entry<String, String> entry : data.entrySet()) { 215 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 216 } 217 return this; 218 } 219 220 public Connection data(String... keyvals) { 221 Validate.notNull(keyvals, "Data key value pairs must not be null"); 222 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 223 for (int i = 0; i < keyvals.length; i += 2) { 224 String key = keyvals[i]; 225 String value = keyvals[i+1]; 226 Validate.notEmpty(key, "Data key must not be empty"); 227 Validate.notNull(value, "Data value must not be null"); 228 req.data(KeyVal.create(key, value)); 229 } 230 return this; 231 } 232 233 public Connection data(Collection<Connection.KeyVal> data) { 234 Validate.notNull(data, "Data collection must not be null"); 235 for (Connection.KeyVal entry: data) { 236 req.data(entry); 237 } 238 return this; 239 } 240 241 public Connection.KeyVal data(String key) { 242 Validate.notEmpty(key, "Data key must not be empty"); 243 for (Connection.KeyVal keyVal : request().data()) { 244 if (keyVal.key().equals(key)) 245 return keyVal; 246 } 247 return null; 248 } 249 250 public Connection requestBody(String body) { 251 req.requestBody(body); 252 return this; 253 } 254 255 public Connection header(String name, String value) { 256 req.header(name, value); 257 return this; 258 } 259 260 public Connection headers(Map<String,String> headers) { 261 Validate.notNull(headers, "Header map must not be null"); 262 for (Map.Entry<String,String> entry : headers.entrySet()) { 263 req.header(entry.getKey(),entry.getValue()); 264 } 265 return this; 266 } 267 268 public Connection cookie(String name, String value) { 269 req.cookie(name, value); 270 return this; 271 } 272 273 public Connection cookies(Map<String, String> cookies) { 274 Validate.notNull(cookies, "Cookie map must not be null"); 275 for (Map.Entry<String, String> entry : cookies.entrySet()) { 276 req.cookie(entry.getKey(), entry.getValue()); 277 } 278 return this; 279 } 280 281 public Connection parser(Parser parser) { 282 req.parser(parser); 283 return this; 284 } 285 286 public Document get() throws IOException { 287 req.method(Method.GET); 288 execute(); 289 return res.parse(); 290 } 291 292 public Document post() throws IOException { 293 req.method(Method.POST); 294 execute(); 295 return res.parse(); 296 } 297 298 public Connection.Response execute() throws IOException { 299 res = Response.execute(req); 300 return res; 301 } 302 303 public Connection.Request request() { 304 return req; 305 } 306 307 public Connection request(Connection.Request request) { 308 req = request; 309 return this; 310 } 311 312 public Connection.Response response() { 313 return res; 314 } 315 316 public Connection response(Connection.Response response) { 317 res = response; 318 return this; 319 } 320 321 public Connection postDataCharset(String charset) { 322 req.postDataCharset(charset); 323 return this; 324 } 325 326 @SuppressWarnings({"unchecked"}) 327 private static abstract class Base<T extends Connection.Base> implements Connection.Base<T> { 328 URL url; 329 Method method; 330 Map<String, List<String>> headers; 331 Map<String, String> cookies; 332 333 private Base() { 334 headers = new LinkedHashMap<>(); 335 cookies = new LinkedHashMap<>(); 336 } 337 338 public URL url() { 339 return url; 340 } 341 342 public T url(URL url) { 343 Validate.notNull(url, "URL must not be null"); 344 this.url = url; 345 return (T) this; 346 } 347 348 public Method method() { 349 return method; 350 } 351 352 public T method(Method method) { 353 Validate.notNull(method, "Method must not be null"); 354 this.method = method; 355 return (T) this; 356 } 357 358 public String header(String name) { 359 Validate.notNull(name, "Header name must not be null"); 360 List<String> vals = getHeadersCaseInsensitive(name); 361 if (vals.size() > 0) { 362 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 363 return StringUtil.join(vals, ", "); 364 } 365 366 return null; 367 } 368 369 @Override 370 public T addHeader(String name, String value) { 371 Validate.notEmpty(name); 372 value = value == null ? "" : value; 373 374 List<String> values = headers(name); 375 if (values.isEmpty()) { 376 values = new ArrayList<>(); 377 headers.put(name, values); 378 } 379 values.add(fixHeaderEncoding(value)); 380 381 return (T) this; 382 } 383 384 @Override 385 public List<String> headers(String name) { 386 Validate.notEmpty(name); 387 return getHeadersCaseInsensitive(name); 388 } 389 390 private static String fixHeaderEncoding(String val) { 391 try { 392 byte[] bytes = val.getBytes("ISO-8859-1"); 393 if (!looksLikeUtf8(bytes)) 394 return val; 395 return new String(bytes, "UTF-8"); 396 } catch (UnsupportedEncodingException e) { 397 // shouldn't happen as these both always exist 398 return val; 399 } 400 } 401 402 private static boolean looksLikeUtf8(byte[] input) { 403 int i = 0; 404 // BOM: 405 if (input.length >= 3 && (input[0] & 0xFF) == 0xEF 406 && (input[1] & 0xFF) == 0xBB & (input[2] & 0xFF) == 0xBF) { 407 i = 3; 408 } 409 410 int end; 411 for (int j = input.length; i < j; ++i) { 412 int o = input[i]; 413 if ((o & 0x80) == 0) { 414 continue; // ASCII 415 } 416 417 // UTF-8 leading: 418 if ((o & 0xE0) == 0xC0) { 419 end = i + 1; 420 } else if ((o & 0xF0) == 0xE0) { 421 end = i + 2; 422 } else if ((o & 0xF8) == 0xF0) { 423 end = i + 3; 424 } else { 425 return false; 426 } 427 428 while (i < end) { 429 i++; 430 o = input[i]; 431 if ((o & 0xC0) != 0x80) { 432 return false; 433 } 434 } 435 } 436 return true; 437 } 438 439 public T header(String name, String value) { 440 Validate.notEmpty(name, "Header name must not be empty"); 441 removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding" 442 addHeader(name, value); 443 return (T) this; 444 } 445 446 public boolean hasHeader(String name) { 447 Validate.notEmpty(name, "Header name must not be empty"); 448 return getHeadersCaseInsensitive(name).size() != 0; 449 } 450 451 /** 452 * Test if the request has a header with this value (case insensitive). 453 */ 454 public boolean hasHeaderWithValue(String name, String value) { 455 Validate.notEmpty(name); 456 Validate.notEmpty(value); 457 List<String> values = headers(name); 458 for (String candidate : values) { 459 if (value.equalsIgnoreCase(candidate)) 460 return true; 461 } 462 return false; 463 } 464 465 public T removeHeader(String name) { 466 Validate.notEmpty(name, "Header name must not be empty"); 467 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case insensitive too 468 if (entry != null) 469 headers.remove(entry.getKey()); // ensures correct case 470 return (T) this; 471 } 472 473 public Map<String, String> headers() { 474 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 475 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 476 String header = entry.getKey(); 477 List<String> values = entry.getValue(); 478 if (values.size() > 0) 479 map.put(header, values.get(0)); 480 } 481 return map; 482 } 483 484 @Override 485 public Map<String, List<String>> multiHeaders() { 486 return headers; 487 } 488 489 private List<String> getHeadersCaseInsensitive(String name) { 490 Validate.notNull(name); 491 492 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 493 if (name.equalsIgnoreCase(entry.getKey())) 494 return entry.getValue(); 495 } 496 497 return Collections.emptyList(); 498 } 499 500 private Map.Entry<String, List<String>> scanHeaders(String name) { 501 String lc = lowerCase(name); 502 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 503 if (lowerCase(entry.getKey()).equals(lc)) 504 return entry; 505 } 506 return null; 507 } 508 509 public String cookie(String name) { 510 Validate.notEmpty(name, "Cookie name must not be empty"); 511 return cookies.get(name); 512 } 513 514 public T cookie(String name, String value) { 515 Validate.notEmpty(name, "Cookie name must not be empty"); 516 Validate.notNull(value, "Cookie value must not be null"); 517 cookies.put(name, value); 518 return (T) this; 519 } 520 521 public boolean hasCookie(String name) { 522 Validate.notEmpty(name, "Cookie name must not be empty"); 523 return cookies.containsKey(name); 524 } 525 526 public T removeCookie(String name) { 527 Validate.notEmpty(name, "Cookie name must not be empty"); 528 cookies.remove(name); 529 return (T) this; 530 } 531 532 public Map<String, String> cookies() { 533 return cookies; 534 } 535 } 536 537 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 538 private Proxy proxy; // nullable 539 private int timeoutMilliseconds; 540 private int maxBodySizeBytes; 541 private boolean followRedirects; 542 private Collection<Connection.KeyVal> data; 543 private String body = null; 544 private boolean ignoreHttpErrors = false; 545 private boolean ignoreContentType = false; 546 private Parser parser; 547 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 548 private boolean validateTSLCertificates = true; 549 private String postDataCharset = DataUtil.defaultCharset; 550 551 Request() { 552 timeoutMilliseconds = 30000; // 30 seconds 553 maxBodySizeBytes = 1024 * 1024; // 1MB 554 followRedirects = true; 555 data = new ArrayList<>(); 556 method = Method.GET; 557 addHeader("Accept-Encoding", "gzip"); 558 addHeader(USER_AGENT, DEFAULT_UA); 559 parser = Parser.htmlParser(); 560 } 561 562 public Proxy proxy() { 563 return proxy; 564 } 565 566 public Request proxy(Proxy proxy) { 567 this.proxy = proxy; 568 return this; 569 } 570 571 public Request proxy(String host, int port) { 572 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 573 return this; 574 } 575 576 public int timeout() { 577 return timeoutMilliseconds; 578 } 579 580 public Request timeout(int millis) { 581 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 582 timeoutMilliseconds = millis; 583 return this; 584 } 585 586 public int maxBodySize() { 587 return maxBodySizeBytes; 588 } 589 590 public Connection.Request maxBodySize(int bytes) { 591 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 592 maxBodySizeBytes = bytes; 593 return this; 594 } 595 596 public boolean followRedirects() { 597 return followRedirects; 598 } 599 600 public Connection.Request followRedirects(boolean followRedirects) { 601 this.followRedirects = followRedirects; 602 return this; 603 } 604 605 public boolean ignoreHttpErrors() { 606 return ignoreHttpErrors; 607 } 608 609 public boolean validateTLSCertificates() { 610 return validateTSLCertificates; 611 } 612 613 public void validateTLSCertificates(boolean value) { 614 validateTSLCertificates = value; 615 } 616 617 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 618 this.ignoreHttpErrors = ignoreHttpErrors; 619 return this; 620 } 621 622 public boolean ignoreContentType() { 623 return ignoreContentType; 624 } 625 626 public Connection.Request ignoreContentType(boolean ignoreContentType) { 627 this.ignoreContentType = ignoreContentType; 628 return this; 629 } 630 631 public Request data(Connection.KeyVal keyval) { 632 Validate.notNull(keyval, "Key val must not be null"); 633 data.add(keyval); 634 return this; 635 } 636 637 public Collection<Connection.KeyVal> data() { 638 return data; 639 } 640 641 public Connection.Request requestBody(String body) { 642 this.body = body; 643 return this; 644 } 645 646 public String requestBody() { 647 return body; 648 } 649 650 public Request parser(Parser parser) { 651 this.parser = parser; 652 parserDefined = true; 653 return this; 654 } 655 656 public Parser parser() { 657 return parser; 658 } 659 660 public Connection.Request postDataCharset(String charset) { 661 Validate.notNull(charset, "Charset must not be null"); 662 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 663 this.postDataCharset = charset; 664 return this; 665 } 666 667 public String postDataCharset() { 668 return postDataCharset; 669 } 670 } 671 672 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 673 private static final int MAX_REDIRECTS = 20; 674 private static SSLSocketFactory sslSocketFactory; 675 private static final String LOCATION = "Location"; 676 private int statusCode; 677 private String statusMessage; 678 private ByteBuffer byteData; 679 private InputStream bodyStream; 680 private String charset; 681 private String contentType; 682 private boolean executed = false; 683 private boolean inputStreamRead = false; 684 private int numRedirects = 0; 685 private Connection.Request req; 686 687 /* 688 * Matches XML content types (like text/xml, application/xhtml+xml;charset=UTF8, etc) 689 */ 690 private static final Pattern xmlContentTypeRxp = Pattern.compile("(application|text)/\\w*\\+?xml.*"); 691 692 Response() { 693 super(); 694 } 695 696 private Response(Response previousResponse) throws IOException { 697 super(); 698 if (previousResponse != null) { 699 numRedirects = previousResponse.numRedirects + 1; 700 if (numRedirects >= MAX_REDIRECTS) 701 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 702 } 703 } 704 705 static Response execute(Connection.Request req) throws IOException { 706 return execute(req, null); 707 } 708 709 static Response execute(Connection.Request req, Response previousResponse) throws IOException { 710 Validate.notNull(req, "Request must not be null"); 711 String protocol = req.url().getProtocol(); 712 if (!protocol.equals("http") && !protocol.equals("https")) 713 throw new MalformedURLException("Only http & https protocols supported"); 714 final boolean methodHasBody = req.method().hasBody(); 715 final boolean hasRequestBody = req.requestBody() != null; 716 if (!methodHasBody) 717 Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method()); 718 719 // set up the request for execution 720 String mimeBoundary = null; 721 if (req.data().size() > 0 && (!methodHasBody || hasRequestBody)) 722 serialiseRequestUrl(req); 723 else if (methodHasBody) 724 mimeBoundary = setOutputContentType(req); 725 726 long startTime = System.nanoTime(); 727 HttpURLConnection conn = createConnection(req); 728 Response res; 729 try { 730 conn.connect(); 731 if (conn.getDoOutput()) 732 writePost(req, conn.getOutputStream(), mimeBoundary); 733 734 int status = conn.getResponseCode(); 735 res = new Response(previousResponse); 736 res.setupFromConnection(conn, previousResponse); 737 res.req = req; 738 739 // redirect if there's a location header (from 3xx, or 201 etc) 740 if (res.hasHeader(LOCATION) && req.followRedirects()) { 741 if (status != HTTP_TEMP_REDIR) { 742 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 743 req.data().clear(); 744 req.requestBody(null); 745 req.removeHeader(CONTENT_TYPE); 746 } 747 748 String location = res.header(LOCATION); 749 if (location != null && location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 750 location = location.substring(6); 751 URL redir = StringUtil.resolve(req.url(), location); 752 req.url(encodeUrl(redir)); 753 754 for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add response cookies to request (for e.g. login posts) 755 req.cookie(cookie.getKey(), cookie.getValue()); 756 } 757 return execute(req, res); 758 } 759 if ((status < 200 || status >= 400) && !req.ignoreHttpErrors()) 760 throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString()); 761 762 // check that we can handle the returned content type; if not, abort before fetching it 763 String contentType = res.contentType(); 764 if (contentType != null 765 && !req.ignoreContentType() 766 && !contentType.startsWith("text/") 767 && !xmlContentTypeRxp.matcher(contentType).matches() 768 ) 769 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, application/xml, or application/xhtml+xml", 770 contentType, req.url().toString()); 771 772 // switch to the XML parser if content type is xml and not parser not explicitly set 773 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 774 // only flip it if a HttpConnection.Request (i.e. don't presume other impls want it): 775 if (req instanceof HttpConnection.Request && !((Request) req).parserDefined) { 776 req.parser(Parser.xmlParser()); 777 } 778 } 779 780 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 781 if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 782 res.bodyStream = null; 783 res.bodyStream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream(); 784 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 785 res.bodyStream = new GZIPInputStream(res.bodyStream); 786 res.bodyStream = ConstrainableInputStream 787 .wrap(res.bodyStream, DataUtil.bufferSize, req.maxBodySize()) 788 .timeout(startTime, req.timeout()) 789 ; 790 } else { 791 res.byteData = DataUtil.emptyByteBuffer(); 792 } 793 } catch (IOException e){ 794 // per Java's documentation, this is not necessary, and precludes keepalives. However in practise, 795 // connection errors will not be released quickly enough and can cause a too many open files error. 796 conn.disconnect(); 797 throw e; 798 } 799 800 res.executed = true; 801 return res; 802 } 803 804 public int statusCode() { 805 return statusCode; 806 } 807 808 public String statusMessage() { 809 return statusMessage; 810 } 811 812 public String charset() { 813 return charset; 814 } 815 816 public Response charset(String charset) { 817 this.charset = charset; 818 return this; 819 } 820 821 public String contentType() { 822 return contentType; 823 } 824 825 public Document parse() throws IOException { 826 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 827 if (byteData != null) { // bytes have been read in to the buffer, parse that 828 bodyStream = new ByteArrayInputStream(byteData.array()); 829 inputStreamRead = false; // ok to reparse if in bytes 830 } 831 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 832 Document doc = DataUtil.parseInputStream(bodyStream, charset, url.toExternalForm(), req.parser()); 833 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 834 inputStreamRead = true; 835 safeClose(); 836 return doc; 837 } 838 839 private void prepareByteData() { 840 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 841 if (byteData == null) { 842 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 843 try { 844 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 845 } catch (IOException e) { 846 throw new UncheckedIOException(e); 847 } finally { 848 inputStreamRead = true; 849 safeClose(); 850 } 851 } 852 } 853 854 public String body() { 855 prepareByteData(); 856 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 857 String body; 858 if (charset == null) 859 body = Charset.forName(DataUtil.defaultCharset).decode(byteData).toString(); 860 else 861 body = Charset.forName(charset).decode(byteData).toString(); 862 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 863 return body; 864 } 865 866 public byte[] bodyAsBytes() { 867 prepareByteData(); 868 return byteData.array(); 869 } 870 871 @Override 872 public Connection.Response bufferUp() { 873 prepareByteData(); 874 return this; 875 } 876 877 @Override 878 public BufferedInputStream bodyStream() { 879 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 880 Validate.isFalse(inputStreamRead, "Request has already been read"); 881 inputStreamRead = true; 882 return ConstrainableInputStream.wrap(bodyStream, DataUtil.bufferSize, req.maxBodySize()); 883 } 884 885 // set up connection defaults, and details from request 886 private static HttpURLConnection createConnection(Connection.Request req) throws IOException { 887 final HttpURLConnection conn = (HttpURLConnection) ( 888 req.proxy() == null ? 889 req.url().openConnection() : 890 req.url().openConnection(req.proxy()) 891 ); 892 893 conn.setRequestMethod(req.method().name()); 894 conn.setInstanceFollowRedirects(false); // don't rely on native redirection support 895 conn.setConnectTimeout(req.timeout()); 896 conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read 897 898 if (conn instanceof HttpsURLConnection) { 899 if (!req.validateTLSCertificates()) { 900 initUnSecureTSL(); 901 ((HttpsURLConnection)conn).setSSLSocketFactory(sslSocketFactory); 902 ((HttpsURLConnection)conn).setHostnameVerifier(getInsecureVerifier()); 903 } 904 } 905 906 if (req.method().hasBody()) 907 conn.setDoOutput(true); 908 if (req.cookies().size() > 0) 909 conn.addRequestProperty("Cookie", getRequestCookieString(req)); 910 for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) { 911 for (String value : header.getValue()) { 912 conn.addRequestProperty(header.getKey(), value); 913 } 914 } 915 return conn; 916 } 917 918 /** 919 * Call on completion of stream read, to close the body (or error) stream 920 */ 921 private void safeClose() { 922 if (bodyStream != null) { 923 try { 924 bodyStream.close(); 925 } catch (IOException e) { 926 // no-op 927 } finally { 928 bodyStream = null; 929 } 930 } 931 } 932 933 /** 934 * Instantiate Hostname Verifier that does nothing. 935 * This is used for connections with disabled SSL certificates validation. 936 * 937 * 938 * @return Hostname Verifier that does nothing and accepts all hostnames 939 */ 940 private static HostnameVerifier getInsecureVerifier() { 941 return new HostnameVerifier() { 942 public boolean verify(String urlHostName, SSLSession session) { 943 return true; 944 } 945 }; 946 } 947 948 /** 949 * Initialise Trust manager that does not validate certificate chains and 950 * add it to current SSLContext. 951 * <p/> 952 * please not that this method will only perform action if sslSocketFactory is not yet 953 * instantiated. 954 * 955 * @throws IOException on SSL init errors 956 */ 957 private static synchronized void initUnSecureTSL() throws IOException { 958 if (sslSocketFactory == null) { 959 // Create a trust manager that does not validate certificate chains 960 final TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() { 961 962 public void checkClientTrusted(final X509Certificate[] chain, final String authType) { 963 } 964 965 public void checkServerTrusted(final X509Certificate[] chain, final String authType) { 966 } 967 968 public X509Certificate[] getAcceptedIssuers() { 969 return null; 970 } 971 }}; 972 973 // Install the all-trusting trust manager 974 final SSLContext sslContext; 975 try { 976 sslContext = SSLContext.getInstance("SSL"); 977 sslContext.init(null, trustAllCerts, new java.security.SecureRandom()); 978 // Create an ssl socket factory with our all-trusting manager 979 sslSocketFactory = sslContext.getSocketFactory(); 980 } catch (NoSuchAlgorithmException | KeyManagementException e) { 981 throw new IOException("Can't create unsecure trust manager"); 982 } 983 } 984 985 } 986 987 // set up url, method, header, cookies 988 private void setupFromConnection(HttpURLConnection conn, Connection.Response previousResponse) throws IOException { 989 method = Method.valueOf(conn.getRequestMethod()); 990 url = conn.getURL(); 991 statusCode = conn.getResponseCode(); 992 statusMessage = conn.getResponseMessage(); 993 contentType = conn.getContentType(); 994 995 Map<String, List<String>> resHeaders = createHeaderMap(conn); 996 processResponseHeaders(resHeaders); 997 998 // if from a redirect, map previous response cookies into this response 999 if (previousResponse != null) { 1000 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1001 if (!hasCookie(prevCookie.getKey())) 1002 cookie(prevCookie.getKey(), prevCookie.getValue()); 1003 } 1004 } 1005 } 1006 1007 private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) { 1008 // the default sun impl of conn.getHeaderFields() returns header values out of order 1009 final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>(); 1010 int i = 0; 1011 while (true) { 1012 final String key = conn.getHeaderFieldKey(i); 1013 final String val = conn.getHeaderField(i); 1014 if (key == null && val == null) 1015 break; 1016 i++; 1017 if (key == null || val == null) 1018 continue; // skip http1.1 line 1019 1020 if (headers.containsKey(key)) 1021 headers.get(key).add(val); 1022 else { 1023 final ArrayList<String> vals = new ArrayList<>(); 1024 vals.add(val); 1025 headers.put(key, vals); 1026 } 1027 } 1028 return headers; 1029 } 1030 1031 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1032 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1033 String name = entry.getKey(); 1034 if (name == null) 1035 continue; // http/1.1 line 1036 1037 List<String> values = entry.getValue(); 1038 if (name.equalsIgnoreCase("Set-Cookie")) { 1039 for (String value : values) { 1040 if (value == null) 1041 continue; 1042 TokenQueue cd = new TokenQueue(value); 1043 String cookieName = cd.chompTo("=").trim(); 1044 String cookieVal = cd.consumeTo(";").trim(); 1045 // ignores path, date, domain, validateTLSCertificates et al. req'd? 1046 // name not blank, value not null 1047 if (cookieName.length() > 0) 1048 cookie(cookieName, cookieVal); 1049 } 1050 } 1051 for (String value : values) { 1052 addHeader(name, value); 1053 } 1054 } 1055 } 1056 1057 private static String setOutputContentType(final Connection.Request req) { 1058 String bound = null; 1059 if (req.hasHeader(CONTENT_TYPE)) { 1060 // no-op; don't add content type as already set (e.g. for requestBody()) 1061 // todo - if content type already set, we could add charset or boundary if those aren't included 1062 } 1063 else if (needsMultipart(req)) { 1064 bound = DataUtil.mimeBoundary(); 1065 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1066 } else { 1067 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1068 } 1069 return bound; 1070 } 1071 1072 private static void writePost(final Connection.Request req, final OutputStream outputStream, final String bound) throws IOException { 1073 final Collection<Connection.KeyVal> data = req.data(); 1074 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, req.postDataCharset())); 1075 1076 if (bound != null) { 1077 // boundary will be set if we're in multipart mode 1078 for (Connection.KeyVal keyVal : data) { 1079 w.write("--"); 1080 w.write(bound); 1081 w.write("\r\n"); 1082 w.write("Content-Disposition: form-data; name=\""); 1083 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1084 w.write("\""); 1085 if (keyVal.hasInputStream()) { 1086 w.write("; filename=\""); 1087 w.write(encodeMimeName(keyVal.value())); 1088 w.write("\"\r\nContent-Type: "); 1089 w.write(keyVal.contentType() != null ? keyVal.contentType() : DefaultUploadType); 1090 w.write("\r\n\r\n"); 1091 w.flush(); // flush 1092 DataUtil.crossStreams(keyVal.inputStream(), outputStream); 1093 outputStream.flush(); 1094 } else { 1095 w.write("\r\n\r\n"); 1096 w.write(keyVal.value()); 1097 } 1098 w.write("\r\n"); 1099 } 1100 w.write("--"); 1101 w.write(bound); 1102 w.write("--"); 1103 } else if (req.requestBody() != null) { 1104 // data will be in query string, we're sending a plaintext body 1105 w.write(req.requestBody()); 1106 } 1107 else { 1108 // regular form data (application/x-www-form-urlencoded) 1109 boolean first = true; 1110 for (Connection.KeyVal keyVal : data) { 1111 if (!first) 1112 w.append('&'); 1113 else 1114 first = false; 1115 1116 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1117 w.write('='); 1118 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1119 } 1120 } 1121 w.close(); 1122 } 1123 1124 private static String getRequestCookieString(Connection.Request req) { 1125 StringBuilder sb = StringUtil.stringBuilder(); 1126 boolean first = true; 1127 for (Map.Entry<String, String> cookie : req.cookies().entrySet()) { 1128 if (!first) 1129 sb.append("; "); 1130 else 1131 first = false; 1132 sb.append(cookie.getKey()).append('=').append(cookie.getValue()); 1133 // todo: spec says only ascii, no escaping / encoding defined. validate on set? or escape somehow here? 1134 } 1135 return sb.toString(); 1136 } 1137 1138 // for get url reqs, serialise the data map into the url 1139 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1140 URL in = req.url(); 1141 StringBuilder url = StringUtil.stringBuilder(); 1142 boolean first = true; 1143 // reconstitute the query, ready for appends 1144 url 1145 .append(in.getProtocol()) 1146 .append("://") 1147 .append(in.getAuthority()) // includes host, port 1148 .append(in.getPath()) 1149 .append("?"); 1150 if (in.getQuery() != null) { 1151 url.append(in.getQuery()); 1152 first = false; 1153 } 1154 for (Connection.KeyVal keyVal : req.data()) { 1155 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1156 if (!first) 1157 url.append('&'); 1158 else 1159 first = false; 1160 url 1161 .append(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset)) 1162 .append('=') 1163 .append(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset)); 1164 } 1165 req.url(new URL(url.toString())); 1166 req.data().clear(); // moved into url as get params 1167 } 1168 } 1169 1170 private static boolean needsMultipart(Connection.Request req) { 1171 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1172 boolean needsMulti = false; 1173 for (Connection.KeyVal keyVal : req.data()) { 1174 if (keyVal.hasInputStream()) { 1175 needsMulti = true; 1176 break; 1177 } 1178 } 1179 return needsMulti; 1180 } 1181 1182 public static class KeyVal implements Connection.KeyVal { 1183 private String key; 1184 private String value; 1185 private InputStream stream; 1186 private String contentType; 1187 1188 public static KeyVal create(String key, String value) { 1189 return new KeyVal().key(key).value(value); 1190 } 1191 1192 public static KeyVal create(String key, String filename, InputStream stream) { 1193 return new KeyVal().key(key).value(filename).inputStream(stream); 1194 } 1195 1196 private KeyVal() {} 1197 1198 public KeyVal key(String key) { 1199 Validate.notEmpty(key, "Data key must not be empty"); 1200 this.key = key; 1201 return this; 1202 } 1203 1204 public String key() { 1205 return key; 1206 } 1207 1208 public KeyVal value(String value) { 1209 Validate.notNull(value, "Data value must not be null"); 1210 this.value = value; 1211 return this; 1212 } 1213 1214 public String value() { 1215 return value; 1216 } 1217 1218 public KeyVal inputStream(InputStream inputStream) { 1219 Validate.notNull(value, "Data input stream must not be null"); 1220 this.stream = inputStream; 1221 return this; 1222 } 1223 1224 public InputStream inputStream() { 1225 return stream; 1226 } 1227 1228 public boolean hasInputStream() { 1229 return stream != null; 1230 } 1231 1232 @Override 1233 public Connection.KeyVal contentType(String contentType) { 1234 Validate.notEmpty(contentType); 1235 this.contentType = contentType; 1236 return this; 1237 } 1238 1239 @Override 1240 public String contentType() { 1241 return contentType; 1242 } 1243 1244 @Override 1245 public String toString() { 1246 return key + "=" + value; 1247 } 1248 } 1249}