001package org.jsoup;
002
003import org.jsoup.nodes.Document;
004import org.jsoup.parser.Parser;
005
006import java.io.BufferedInputStream;
007import java.io.IOException;
008import java.io.InputStream;
009import java.net.Proxy;
010import java.net.URL;
011import java.util.Collection;
012import java.util.List;
013import java.util.Map;
014
015/**
016 * A Connection provides a convenient interface to fetch content from the web, and parse them into Documents.
017 * <p>
018 * To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}. Connections contain {@link Connection.Request}
019 * and {@link Connection.Response} objects. The request objects are reusable as prototype requests.
020 * </p>
021 * <p>
022 * Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}),
023 * or by methods in the Connection.Request object directly. All request configuration must be made before the request is
024 * executed.
025 * </p>
026 */
027public interface Connection {
028
029    /**
030     * GET and POST http methods.
031     */
032    enum Method {
033        GET(false), POST(true), PUT(true), DELETE(false), PATCH(true), HEAD(false), OPTIONS(false), TRACE(false);
034
035        private final boolean hasBody;
036
037        Method(boolean hasBody) {
038            this.hasBody = hasBody;
039        }
040
041        /**
042         * Check if this HTTP method has/needs a request body
043         * @return if body needed
044         */
045        public final boolean hasBody() {
046            return hasBody;
047        }
048    }
049
050    /**
051     * Set the request URL to fetch. The protocol must be HTTP or HTTPS.
052     * @param url URL to connect to
053     * @return this Connection, for chaining
054     */
055    Connection url(URL url);
056
057    /**
058     * Set the request URL to fetch. The protocol must be HTTP or HTTPS.
059     * @param url URL to connect to
060     * @return this Connection, for chaining
061     */
062    Connection url(String url);
063
064    /**
065     * Set the proxy to use for this request. Set to <code>null</code> to disable.
066     * @param proxy proxy to use
067     * @return this Connection, for chaining
068     */
069    Connection proxy(Proxy proxy);
070
071    /**
072     * Set the HTTP proxy to use for this request.
073     * @param host the proxy hostname
074     * @param port the proxy port
075     * @return this Connection, for chaining
076     */
077    Connection proxy(String host, int port);
078
079    /**
080     * Set the request user-agent header.
081     * @param userAgent user-agent to use
082     * @return this Connection, for chaining
083     * @see org.jsoup.helper.HttpConnection#DEFAULT_UA
084     */
085    Connection userAgent(String userAgent);
086
087    /**
088     * Set the total request timeout duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be thrown.
089     * <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout.
090     * <p>Note that this timeout specifies the combined maximum duration of the connection time and the time to read
091     * the full response.
092     * @param millis number of milliseconds (thousandths of a second) before timing out connects or reads.
093     * @return this Connection, for chaining
094     * @see #maxBodySize(int)
095     */
096    Connection timeout(int millis);
097
098    /**
099     * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed,
100     * and the input truncated. The default maximum is 1MB. A max size of zero is treated as an infinite amount (bounded
101     * only by your patience and the memory available on your machine).
102     * @param bytes number of bytes to read from the input before truncating
103     * @return this Connection, for chaining
104     */
105    Connection maxBodySize(int bytes);
106
107    /**
108     * Set the request referrer (aka "referer") header.
109     * @param referrer referrer to use
110     * @return this Connection, for chaining
111     */
112    Connection referrer(String referrer);
113
114    /**
115     * Configures the connection to (not) follow server redirects. By default this is <b>true</b>.
116     * @param followRedirects true if server redirects should be followed.
117     * @return this Connection, for chaining
118     */
119    Connection followRedirects(boolean followRedirects);
120
121    /**
122     * Set the request method to use, GET or POST. Default is GET.
123     * @param method HTTP request method
124     * @return this Connection, for chaining
125     */
126    Connection method(Method method);
127
128    /**
129     * Configures the connection to not throw exceptions when a HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By
130     * default this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the
131     * response is populated with the error body, and the status message will reflect the error.
132     * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored.
133     * @return this Connection, for chaining
134     */
135    Connection ignoreHttpErrors(boolean ignoreHttpErrors);
136
137    /**
138     * Ignore the document's Content-Type when parsing the response. By default this is <b>false</b>, an unrecognised
139     * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse
140     * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type.
141     * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a
142     * Document.
143     * @return this Connection, for chaining
144     */
145    Connection ignoreContentType(boolean ignoreContentType);
146
147    /**
148     * Disable/enable TLS certificates validation for HTTPS requests.
149     * <p>
150     * By default this is <b>true</b>; all
151     * connections over HTTPS perform normal validation of certificates, and will abort requests if the provided
152     * certificate does not validate.
153     * </p>
154     * <p>
155     * Some servers use expired, self-generated certificates; or your JDK may not
156     * support SNI hosts. In which case, you may want to enable this setting.
157     * </p>
158     * <p>
159     * <b>Be careful</b> and understand why you need to disable these validations.
160     * </p>
161     * @param value if should validate TLS (SSL) certificates. <b>true</b> by default.
162     * @return this Connection, for chaining
163     */
164    Connection validateTLSCertificates(boolean value);
165
166    /**
167     * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the
168     * request body for POSTs. A request may have multiple values of the same name.
169     * @param key data key
170     * @param value data value
171     * @return this Connection, for chaining
172     */
173    Connection data(String key, String value);
174
175    /**
176     * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the
177     * input stream.
178     * @param key data key (form item name)
179     * @param filename the name of the file to present to the remove server. Typically just the name, not path,
180     * component.
181     * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
182     * You must close the InputStream in a {@code finally} block.
183     * @return this Connections, for chaining
184     * @see #data(String, String, InputStream, String) if you want to set the uploaded file's mimetype.
185     */
186    Connection data(String key, String filename, InputStream inputStream);
187
188    /**
189     * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the
190     * input stream.
191     * @param key data key (form item name)
192     * @param filename the name of the file to present to the remove server. Typically just the name, not path,
193     * component.
194     * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}.
195     * @param contentType the Content Type (aka mimetype) to specify for this file.
196     * You must close the InputStream in a {@code finally} block.
197     * @return this Connections, for chaining
198     */
199    Connection data(String key, String filename, InputStream inputStream, String contentType);
200
201    /**
202     * Adds all of the supplied data to the request data parameters
203     * @param data collection of data parameters
204     * @return this Connection, for chaining
205     */
206    Connection data(Collection<KeyVal> data);
207
208    /**
209     * Adds all of the supplied data to the request data parameters
210     * @param data map of data parameters
211     * @return this Connection, for chaining
212     */
213    Connection data(Map<String, String> data);
214
215    /**
216     * Add a number of request data parameters. Multiple parameters may be set at once, e.g.: <code>.data("name",
217     * "jsoup", "language", "Java", "language", "English");</code> creates a query string like:
218     * <code>{@literal ?name=jsoup&language=Java&language=English}</code>
219     * @param keyvals a set of key value pairs.
220     * @return this Connection, for chaining
221     */
222    Connection data(String... keyvals);
223
224    /**
225     * Get the data KeyVal for this key, if any
226     * @param key the data key
227     * @return null if not set
228     */
229    KeyVal data(String key);
230
231    /**
232     * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL
233     * encoded form key/value pairs. E.g.:
234     * <code><pre>Jsoup.connect(url)
235     * .requestBody(json)
236     * .header("Content-Type", "application/json")
237     * .post();</pre></code>
238     * If any data key/vals are supplied, they will be sent as URL query params.
239     * @return this Request, for chaining
240     */
241    Connection requestBody(String body);
242
243    /**
244     * Set a request header.
245     * @param name header name
246     * @param value header value
247     * @return this Connection, for chaining
248     * @see org.jsoup.Connection.Request#headers()
249     */
250    Connection header(String name, String value);
251
252    /**
253     * Adds each of the supplied headers to the request.
254     * @param headers map of headers name {@literal ->} value pairs
255     * @return this Connection, for chaining
256     * @see org.jsoup.Connection.Request#headers()
257     */
258    Connection headers(Map<String,String> headers);
259
260    /**
261     * Set a cookie to be sent in the request.
262     * @param name name of cookie
263     * @param value value of cookie
264     * @return this Connection, for chaining
265     */
266    Connection cookie(String name, String value);
267
268    /**
269     * Adds each of the supplied cookies to the request.
270     * @param cookies map of cookie name {@literal ->} value pairs
271     * @return this Connection, for chaining
272     */
273    Connection cookies(Map<String, String> cookies);
274
275    /**
276     * Provide an alternate parser to use when parsing the response to a Document. If not set, defaults to the HTML
277     * parser, unless the response content-type is XML, in which case the XML parser is used.
278     * @param parser alternate parser
279     * @return this Connection, for chaining
280     */
281    Connection parser(Parser parser);
282
283    /**
284     * Sets the default post data character set for x-www-form-urlencoded post data
285     * @param charset character set to encode post data
286     * @return this Connection, for chaining
287     */
288    Connection postDataCharset(String charset);
289
290    /**
291     * Execute the request as a GET, and parse the result.
292     * @return parsed Document
293     * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
294     * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
295     * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
296     * @throws java.net.SocketTimeoutException if the connection times out
297     * @throws IOException on error
298     */
299    Document get() throws IOException;
300
301    /**
302     * Execute the request as a POST, and parse the result.
303     * @return parsed Document
304     * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
305     * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
306     * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
307     * @throws java.net.SocketTimeoutException if the connection times out
308     * @throws IOException on error
309     */
310    Document post() throws IOException;
311
312    /**
313     * Execute the request.
314     * @return a response object
315     * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed
316     * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored
317     * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored
318     * @throws java.net.SocketTimeoutException if the connection times out
319     * @throws IOException on error
320     */
321    Response execute() throws IOException;
322
323    /**
324     * Get the request object associated with this connection
325     * @return request
326     */
327    Request request();
328
329    /**
330     * Set the connection's request
331     * @param request new request object
332     * @return this Connection, for chaining
333     */
334    Connection request(Request request);
335
336    /**
337     * Get the response, once the request has been executed
338     * @return response
339     */
340    Response response();
341
342    /**
343     * Set the connection's response
344     * @param response new response
345     * @return this Connection, for chaining
346     */
347    Connection response(Response response);
348
349    /**
350     * Common methods for Requests and Responses
351     * @param <T> Type of Base, either Request or Response
352     */
353    interface Base<T extends Base> {
354
355        /**
356         * Get the URL
357         * @return URL
358         */
359        URL url();
360
361        /**
362         * Set the URL
363         * @param url new URL
364         * @return this, for chaining
365         */
366        T url(URL url);
367
368        /**
369         * Get the request method
370         * @return method
371         */
372        Method method();
373
374        /**
375         * Set the request method
376         * @param method new method
377         * @return this, for chaining
378         */
379        T method(Method method);
380
381        /**
382         * Get the value of a header. If there is more than one header value with the same name, the headers are returned
383         * comma seperated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>.
384         * <p>
385         * Header names are case insensitive.
386         * </p>
387         * @param name name of header (case insensitive)
388         * @return value of header, or null if not set.
389         * @see #hasHeader(String)
390         * @see #cookie(String)
391         */
392        String header(String name);
393
394        /**
395         * Get the values of a header.
396         * @param name header name, case insensitive.
397         * @return a list of values for this header, or an empty list if not set.
398         */
399        List<String> headers(String name);
400
401        /**
402         * Set a header. This method will overwrite any existing header with the same case insensitive name. (If there
403         * is more than one value for this header, this method will update the first matching header.
404         * @param name Name of header
405         * @param value Value of header
406         * @return this, for chaining
407         * @see #addHeader(String, String)
408         */
409        T header(String name, String value);
410
411        /**
412         * Add a header. The header will be added regardless of whether a header with the same name already exists.
413         * @param name Name of new header
414         * @param value Value of new header
415         * @return this, for chaining
416         */
417        T addHeader(String name, String value);
418
419        /**
420         * Check if a header is present
421         * @param name name of header (case insensitive)
422         * @return if the header is present in this request/response
423         */
424        boolean hasHeader(String name);
425
426        /**
427         * Check if a header is present, with the given value
428         * @param name header name (case insensitive)
429         * @param value value (case insensitive)
430         * @return if the header and value pair are set in this req/res
431         */
432        boolean hasHeaderWithValue(String name, String value);
433
434        /**
435         * Remove headers by name. If there is more than one header with this name, they will all be removed.
436         * @param name name of header to remove (case insensitive)
437         * @return this, for chaining
438         */
439        T removeHeader(String name);
440
441        /**
442         * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple
443         * values, only the first header is returned.
444         * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the
445         * request/response object.</p>
446         * @return headers
447         * @see #multiHeaders()
448
449         */
450        Map<String, String> headers();
451
452        /**
453         * Retreive all of the headers, keyed by the header name, and with a list of values per header.
454         * @return a list of multiple values per header.
455         */
456        Map<String, List<String>> multiHeaders();
457
458        /**
459         * Get a cookie value by name from this request/response.
460         * <p>
461         * Response objects have a simplified cookie model. Each cookie set in the response is added to the response
462         * object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
463         * </p>
464         * @param name name of cookie to retrieve.
465         * @return value of cookie, or null if not set
466         */
467        String cookie(String name);
468
469        /**
470         * Set a cookie in this request/response.
471         * @param name name of cookie
472         * @param value value of cookie
473         * @return this, for chaining
474         */
475        T cookie(String name, String value);
476
477        /**
478         * Check if a cookie is present
479         * @param name name of cookie
480         * @return if the cookie is present in this request/response
481         */
482        boolean hasCookie(String name);
483
484        /**
485         * Remove a cookie by name
486         * @param name name of cookie to remove
487         * @return this, for chaining
488         */
489        T removeCookie(String name);
490
491        /**
492         * Retrieve all of the request/response cookies as a map
493         * @return cookies
494         */
495        Map<String, String> cookies();
496    }
497
498    /**
499     * Represents a HTTP request.
500     */
501    interface Request extends Base<Request> {
502        /**
503         * Get the proxy used for this request.
504         * @return the proxy; <code>null</code> if not enabled.
505         */
506        Proxy proxy();
507
508        /**
509         * Update the proxy for this request.
510         * @param proxy the proxy ot use; <code>null</code> to disable.
511         * @return this Request, for chaining
512         */
513        Request proxy(Proxy proxy);
514
515        /**
516         * Set the HTTP proxy to use for this request.
517         * @param host the proxy hostname
518         * @param port the proxy port
519         * @return this Connection, for chaining
520         */
521        Request proxy(String host, int port);
522
523        /**
524         * Get the request timeout, in milliseconds.
525         * @return the timeout in milliseconds.
526         */
527        int timeout();
528
529        /**
530         * Update the request timeout.
531         * @param millis timeout, in milliseconds
532         * @return this Request, for chaining
533         */
534        Request timeout(int millis);
535
536        /**
537         * Get the maximum body size, in bytes.
538         * @return the maximum body size, in bytes.
539         */
540        int maxBodySize();
541
542        /**
543         * Update the maximum body size, in bytes.
544         * @param bytes maximum body size, in bytes.
545         * @return this Request, for chaining
546         */
547        Request maxBodySize(int bytes);
548
549        /**
550         * Get the current followRedirects configuration.
551         * @return true if followRedirects is enabled.
552         */
553        boolean followRedirects();
554
555        /**
556         * Configures the request to (not) follow server redirects. By default this is <b>true</b>.
557         * @param followRedirects true if server redirects should be followed.
558         * @return this Request, for chaining
559         */
560        Request followRedirects(boolean followRedirects);
561
562        /**
563         * Get the current ignoreHttpErrors configuration.
564         * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be
565         * thrown.
566         */
567        boolean ignoreHttpErrors();
568
569        /**
570         * Configures the request to ignore HTTP errors in the response.
571         * @param ignoreHttpErrors set to true to ignore HTTP errors.
572         * @return this Request, for chaining
573         */
574        Request ignoreHttpErrors(boolean ignoreHttpErrors);
575
576        /**
577         * Get the current ignoreContentType configuration.
578         * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to
579         * be thrown.
580         */
581        boolean ignoreContentType();
582
583        /**
584         * Configures the request to ignore the Content-Type of the response.
585         * @param ignoreContentType set to true to ignore the content type.
586         * @return this Request, for chaining
587         */
588        Request ignoreContentType(boolean ignoreContentType);
589
590        /**
591         * Get the current state of TLS (SSL) certificate validation.
592         * @return true if TLS cert validation enabled
593         */
594        boolean validateTLSCertificates();
595
596        /**
597         * Set TLS certificate validation.
598         * @param value set false to ignore TLS (SSL) certificates
599         */
600        void validateTLSCertificates(boolean value);
601
602        /**
603         * Add a data parameter to the request
604         * @param keyval data to add.
605         * @return this Request, for chaining
606         */
607        Request data(KeyVal keyval);
608
609        /**
610         * Get all of the request's data parameters
611         * @return collection of keyvals
612         */
613        Collection<KeyVal> data();
614
615        /**
616         * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL
617         * encoded form key/value pairs. E.g.:
618         * <code><pre>Jsoup.connect(url)
619         * .requestBody(json)
620         * .header("Content-Type", "application/json")
621         * .post();</pre></code>
622         * If any data key/vals are supplied, they will be sent as URL query params.
623         * @return this Request, for chaining
624         */
625        Request requestBody(String body);
626
627        /**
628         * Get the current request body.
629         * @return null if not set.
630         */
631        String requestBody();
632
633        /**
634         * Specify the parser to use when parsing the document.
635         * @param parser parser to use.
636         * @return this Request, for chaining
637         */
638        Request parser(Parser parser);
639
640        /**
641         * Get the current parser to use when parsing the document.
642         * @return current Parser
643         */
644        Parser parser();
645
646        /**
647         * Sets the post data character set for x-www-form-urlencoded post data
648         * @param charset character set to encode post data
649         * @return this Request, for chaining
650         */
651        Request postDataCharset(String charset);
652
653        /**
654         * Gets the post data character set for x-www-form-urlencoded post data
655         * @return character set to encode post data
656         */
657        String postDataCharset();
658
659    }
660
661    /**
662     * Represents a HTTP response.
663     */
664    interface Response extends Base<Response> {
665
666        /**
667         * Get the status code of the response.
668         * @return status code
669         */
670        int statusCode();
671
672        /**
673         * Get the status message of the response.
674         * @return status message
675         */
676        String statusMessage();
677
678        /**
679         * Get the character set name of the response, derived from the content-type header.
680         * @return character set name
681         */
682        String charset();
683
684        /**
685         * Set / override the response character set. When the document body is parsed it will be with this charset.
686         * @param charset to decode body as
687         * @return this Response, for chaining
688         */
689        Response charset(String charset);
690
691        /**
692         * Get the response content type (e.g. "text/html");
693         * @return the response content type
694         */
695        String contentType();
696
697        /**
698         * Read and parse the body of the response as a Document. If you intend to parse the same response multiple
699         * times, you should {@link #bufferUp()} first.
700         * @return a parsed Document
701         * @throws IOException on error
702         */
703        Document parse() throws IOException;
704
705        /**
706         * Get the body of the response as a plain string.
707         * @return body
708         */
709        String body();
710
711        /**
712         * Get the body of the response as an array of bytes.
713         * @return body bytes
714         */
715        byte[] bodyAsBytes();
716
717        /**
718         * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the
719         * same connection response (otherwise, once the response is read, its InputStream will have been drained and
720         * may not be re-read). Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.
721         * @return this response, for chaining
722         */
723        Response bufferUp();
724
725        /**
726         * Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done with it.
727         * Other body methods (like bufferUp, body, parse, etc) will not work in conjunction with this method.
728         * <p>This method is useful for writing large responses to disk, without buffering them completely into memory first.</p>
729         * @return the response body input stream
730         */
731        BufferedInputStream bodyStream();
732    }
733
734    /**
735     * A Key:Value tuple(+), used for form data.
736     */
737    interface KeyVal {
738
739        /**
740         * Update the key of a keyval
741         * @param key new key
742         * @return this KeyVal, for chaining
743         */
744        KeyVal key(String key);
745
746        /**
747         * Get the key of a keyval
748         * @return the key
749         */
750        String key();
751
752        /**
753         * Update the value of a keyval
754         * @param value the new value
755         * @return this KeyVal, for chaining
756         */
757        KeyVal value(String value);
758
759        /**
760         * Get the value of a keyval
761         * @return the value
762         */
763        String value();
764
765        /**
766         * Add or update an input stream to this keyVal
767         * @param inputStream new input stream
768         * @return this KeyVal, for chaining
769         */
770        KeyVal inputStream(InputStream inputStream);
771
772        /**
773         * Get the input stream associated with this keyval, if any
774         * @return input stream if set, or null
775         */
776        InputStream inputStream();
777
778        /**
779         * Does this keyval have an input stream?
780         * @return true if this keyval does indeed have an input stream
781         */
782        boolean hasInputStream();
783
784        /**
785         * Set the Content Type header used in the MIME body (aka mimetype) when uploading files.
786         * Only useful if {@link #inputStream(InputStream)} is set.
787         * <p>Will default to {@code application/octet-stream}.</p>
788         * @param contentType the new content type
789         * @return this KeyVal
790         */
791        KeyVal contentType(String contentType);
792
793        /**
794         * Get the current Content Type, or {@code null} if not set.
795         * @return the current Content Type.
796         */
797        String contentType();
798    }
799}