001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.nodes.Element; 005import org.jsoup.nodes.FormElement; 006import org.jsoup.nodes.Node; 007 008import java.util.ArrayList; 009import java.util.Arrays; 010import java.util.Collection; 011import java.util.HashSet; 012import java.util.LinkedHashSet; 013import java.util.List; 014 015/** 016 A list of {@link Element}s, with methods that act on every element in the list. 017 <p> 018 To get an {@code Elements} object, use the {@link Element#select(String)} method. 019 </p> 020 021 @author Jonathan Hedley, jonathan@hedley.net */ 022public class Elements extends ArrayList<Element> { 023 public Elements() { 024 } 025 026 public Elements(int initialCapacity) { 027 super(initialCapacity); 028 } 029 030 public Elements(Collection<Element> elements) { 031 super(elements); 032 } 033 034 public Elements(List<Element> elements) { 035 super(elements); 036 } 037 038 public Elements(Element... elements) { 039 super(Arrays.asList(elements)); 040 } 041 042 /** 043 * Creates a deep copy of these elements. 044 * @return a deep copy 045 */ 046 @Override 047 public Elements clone() { 048 Elements clone = new Elements(size()); 049 050 for(Element e : this) 051 clone.add(e.clone()); 052 053 return clone; 054 } 055 056 // attribute methods 057 /** 058 Get an attribute value from the first matched element that has the attribute. 059 @param attributeKey The attribute key. 060 @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), 061 or if the no elements have the attribute, returns empty string. 062 @see #hasAttr(String) 063 */ 064 public String attr(String attributeKey) { 065 for (Element element : this) { 066 if (element.hasAttr(attributeKey)) 067 return element.attr(attributeKey); 068 } 069 return ""; 070 } 071 072 /** 073 Checks if any of the matched elements have this attribute defined. 074 @param attributeKey attribute key 075 @return true if any of the elements have the attribute; false if none do. 076 */ 077 public boolean hasAttr(String attributeKey) { 078 for (Element element : this) { 079 if (element.hasAttr(attributeKey)) 080 return true; 081 } 082 return false; 083 } 084 085 /** 086 * Get the attribute value for each of the matched elements. If an element does not have this attribute, no value is 087 * included in the result set for that element. 088 * @param attributeKey the attribute name to return values for. You can add the {@code abs:} prefix to the key to 089 * get absolute URLs from relative URLs, e.g.: {@code doc.select("a").eachAttr("abs:href")} . 090 * @return a list of each element's attribute value for the attribute 091 */ 092 public List<String> eachAttr(String attributeKey) { 093 List<String> attrs = new ArrayList<>(size()); 094 for (Element element : this) { 095 if (element.hasAttr(attributeKey)) 096 attrs.add(element.attr(attributeKey)); 097 } 098 return attrs; 099 } 100 101 /** 102 * Set an attribute on all matched elements. 103 * @param attributeKey attribute key 104 * @param attributeValue attribute value 105 * @return this 106 */ 107 public Elements attr(String attributeKey, String attributeValue) { 108 for (Element element : this) { 109 element.attr(attributeKey, attributeValue); 110 } 111 return this; 112 } 113 114 /** 115 * Remove an attribute from every matched element. 116 * @param attributeKey The attribute to remove. 117 * @return this (for chaining) 118 */ 119 public Elements removeAttr(String attributeKey) { 120 for (Element element : this) { 121 element.removeAttr(attributeKey); 122 } 123 return this; 124 } 125 126 /** 127 Add the class name to every matched element's {@code class} attribute. 128 @param className class name to add 129 @return this 130 */ 131 public Elements addClass(String className) { 132 for (Element element : this) { 133 element.addClass(className); 134 } 135 return this; 136 } 137 138 /** 139 Remove the class name from every matched element's {@code class} attribute, if present. 140 @param className class name to remove 141 @return this 142 */ 143 public Elements removeClass(String className) { 144 for (Element element : this) { 145 element.removeClass(className); 146 } 147 return this; 148 } 149 150 /** 151 Toggle the class name on every matched element's {@code class} attribute. 152 @param className class name to add if missing, or remove if present, from every element. 153 @return this 154 */ 155 public Elements toggleClass(String className) { 156 for (Element element : this) { 157 element.toggleClass(className); 158 } 159 return this; 160 } 161 162 /** 163 Determine if any of the matched elements have this class name set in their {@code class} attribute. 164 @param className class name to check for 165 @return true if any do, false if none do 166 */ 167 public boolean hasClass(String className) { 168 for (Element element : this) { 169 if (element.hasClass(className)) 170 return true; 171 } 172 return false; 173 } 174 175 /** 176 * Get the form element's value of the first matched element. 177 * @return The form element's value, or empty if not set. 178 * @see Element#val() 179 */ 180 public String val() { 181 if (size() > 0) 182 return first().val(); 183 else 184 return ""; 185 } 186 187 /** 188 * Set the form element's value in each of the matched elements. 189 * @param value The value to set into each matched element 190 * @return this (for chaining) 191 */ 192 public Elements val(String value) { 193 for (Element element : this) 194 element.val(value); 195 return this; 196 } 197 198 /** 199 * Get the combined text of all the matched elements. 200 * <p> 201 * Note that it is possible to get repeats if the matched elements contain both parent elements and their own 202 * children, as the Element.text() method returns the combined text of a parent and all its children. 203 * @return string of all text: unescaped and no HTML. 204 * @see Element#text() 205 * @see #eachText() 206 */ 207 public String text() { 208 StringBuilder sb = new StringBuilder(); 209 for (Element element : this) { 210 if (sb.length() != 0) 211 sb.append(" "); 212 sb.append(element.text()); 213 } 214 return sb.toString(); 215 } 216 217 /** 218 Test if any matched Element has any text content, that is not just whitespace. 219 @return true if any element has non-blank text content. 220 @see Element#hasText() 221 */ 222 public boolean hasText() { 223 for (Element element: this) { 224 if (element.hasText()) 225 return true; 226 } 227 return false; 228 } 229 230 /** 231 * Get the text content of each of the matched elements. If an element has no text, then it is not included in the 232 * result. 233 * @return A list of each matched element's text content. 234 * @see Element#text() 235 * @see Element#hasText() 236 * @see #text() 237 */ 238 public List<String> eachText() { 239 ArrayList<String> texts = new ArrayList<>(size()); 240 for (Element el: this) { 241 if (el.hasText()) 242 texts.add(el.text()); 243 } 244 return texts; 245 } 246 247 /** 248 * Get the combined inner HTML of all matched elements. 249 * @return string of all element's inner HTML. 250 * @see #text() 251 * @see #outerHtml() 252 */ 253 public String html() { 254 StringBuilder sb = new StringBuilder(); 255 for (Element element : this) { 256 if (sb.length() != 0) 257 sb.append("\n"); 258 sb.append(element.html()); 259 } 260 return sb.toString(); 261 } 262 263 /** 264 * Get the combined outer HTML of all matched elements. 265 * @return string of all element's outer HTML. 266 * @see #text() 267 * @see #html() 268 */ 269 public String outerHtml() { 270 StringBuilder sb = new StringBuilder(); 271 for (Element element : this) { 272 if (sb.length() != 0) 273 sb.append("\n"); 274 sb.append(element.outerHtml()); 275 } 276 return sb.toString(); 277 } 278 279 /** 280 * Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}. 281 * @return string of all element's outer HTML. 282 * @see #text() 283 * @see #html() 284 */ 285 @Override 286 public String toString() { 287 return outerHtml(); 288 } 289 290 /** 291 * Update the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do 292 * {@code doc.select("i").tagName("em");} 293 * @param tagName the new tag name 294 * @return this, for chaining 295 * @see Element#tagName(String) 296 */ 297 public Elements tagName(String tagName) { 298 for (Element element : this) { 299 element.tagName(tagName); 300 } 301 return this; 302 } 303 304 /** 305 * Set the inner HTML of each matched element. 306 * @param html HTML to parse and set into each matched element. 307 * @return this, for chaining 308 * @see Element#html(String) 309 */ 310 public Elements html(String html) { 311 for (Element element : this) { 312 element.html(html); 313 } 314 return this; 315 } 316 317 /** 318 * Add the supplied HTML to the start of each matched element's inner HTML. 319 * @param html HTML to add inside each element, before the existing HTML 320 * @return this, for chaining 321 * @see Element#prepend(String) 322 */ 323 public Elements prepend(String html) { 324 for (Element element : this) { 325 element.prepend(html); 326 } 327 return this; 328 } 329 330 /** 331 * Add the supplied HTML to the end of each matched element's inner HTML. 332 * @param html HTML to add inside each element, after the existing HTML 333 * @return this, for chaining 334 * @see Element#append(String) 335 */ 336 public Elements append(String html) { 337 for (Element element : this) { 338 element.append(html); 339 } 340 return this; 341 } 342 343 /** 344 * Insert the supplied HTML before each matched element's outer HTML. 345 * @param html HTML to insert before each element 346 * @return this, for chaining 347 * @see Element#before(String) 348 */ 349 public Elements before(String html) { 350 for (Element element : this) { 351 element.before(html); 352 } 353 return this; 354 } 355 356 /** 357 * Insert the supplied HTML after each matched element's outer HTML. 358 * @param html HTML to insert after each element 359 * @return this, for chaining 360 * @see Element#after(String) 361 */ 362 public Elements after(String html) { 363 for (Element element : this) { 364 element.after(html); 365 } 366 return this; 367 } 368 369 /** 370 Wrap the supplied HTML around each matched elements. For example, with HTML 371 {@code <p><b>This</b> is <b>Jsoup</b></p>}, 372 <code>doc.select("b").wrap("<i></i>");</code> 373 becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>} 374 @param html HTML to wrap around each element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 375 @return this (for chaining) 376 @see Element#wrap 377 */ 378 public Elements wrap(String html) { 379 Validate.notEmpty(html); 380 for (Element element : this) { 381 element.wrap(html); 382 } 383 return this; 384 } 385 386 /** 387 * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of 388 * dropping the elements but keeping their children. 389 * <p> 390 * This is useful for e.g removing unwanted formatting elements but keeping their contents. 391 * </p> 392 * 393 * E.g. with HTML: <p>{@code <div><font>One</font> <font><a href="/">Two</a></font></div>}</p> 394 * <p>{@code doc.select("font").unwrap();}</p> 395 * <p>HTML = {@code <div>One <a href="/">Two</a></div>}</p> 396 * 397 * @return this (for chaining) 398 * @see Node#unwrap 399 */ 400 public Elements unwrap() { 401 for (Element element : this) { 402 element.unwrap(); 403 } 404 return this; 405 } 406 407 /** 408 * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each 409 * element to nothing. 410 * <p> 411 * E.g. HTML: {@code <div><p>Hello <b>there</b></p> <p>now</p></div>}<br> 412 * <code>doc.select("p").empty();</code><br> 413 * HTML = {@code <div><p></p> <p></p></div>} 414 * @return this, for chaining 415 * @see Element#empty() 416 * @see #remove() 417 */ 418 public Elements empty() { 419 for (Element element : this) { 420 element.empty(); 421 } 422 return this; 423 } 424 425 /** 426 * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing. 427 * <p> 428 * E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br> 429 * <code>doc.select("p").remove();</code><br> 430 * HTML = {@code <div> <img /></div>} 431 * <p> 432 * Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} to clean HTML. 433 * @return this, for chaining 434 * @see Element#empty() 435 * @see #empty() 436 */ 437 public Elements remove() { 438 for (Element element : this) { 439 element.remove(); 440 } 441 return this; 442 } 443 444 // filters 445 446 /** 447 * Find matching elements within this element list. 448 * @param query A {@link Selector} query 449 * @return the filtered list of elements, or an empty list if none match. 450 */ 451 public Elements select(String query) { 452 return Selector.select(query, this); 453 } 454 455 /** 456 * Remove elements from this list that match the {@link Selector} query. 457 * <p> 458 * E.g. HTML: {@code <div class=logo>One</div> <div>Two</div>}<br> 459 * <code>Elements divs = doc.select("div").not(".logo");</code><br> 460 * Result: {@code divs: [<div>Two</div>]} 461 * <p> 462 * @param query the selector query whose results should be removed from these elements 463 * @return a new elements list that contains only the filtered results 464 */ 465 public Elements not(String query) { 466 Elements out = Selector.select(query, this); 467 return Selector.filterOut(this, out); 468 } 469 470 /** 471 * Get the <i>nth</i> matched element as an Elements object. 472 * <p> 473 * See also {@link #get(int)} to retrieve an Element. 474 * @param index the (zero-based) index of the element in the list to retain 475 * @return Elements containing only the specified element, or, if that element did not exist, an empty list. 476 */ 477 public Elements eq(int index) { 478 return size() > index ? new Elements(get(index)) : new Elements(); 479 } 480 481 /** 482 * Test if any of the matched elements match the supplied query. 483 * @param query A selector 484 * @return true if at least one element in the list matches the query. 485 */ 486 public boolean is(String query) { 487 Evaluator eval = QueryParser.parse(query); 488 for (Element e : this) { 489 if (e.is(eval)) 490 return true; 491 } 492 return false; 493 } 494 495 /** 496 * Get the immediate next element sibling of each element in this list. 497 * @return next element siblings. 498 */ 499 public Elements next() { 500 return siblings(null, true, false); 501 } 502 503 /** 504 * Get the immediate next element sibling of each element in this list, filtered by the query. 505 * @param query CSS query to match siblings against 506 * @return next element siblings. 507 */ 508 public Elements next(String query) { 509 return siblings(query, true, false); 510 } 511 512 /** 513 * Get all of the following element siblings of each element in this list. 514 * @return all following element siblings. 515 */ 516 public Elements nextAll() { 517 return siblings(null, true, true); 518 } 519 520 /** 521 * Get all of the following element siblings of each element in this list, filtered by the query. 522 * @param query CSS query to match siblings against 523 * @return all following element siblings. 524 */ 525 public Elements nextAll(String query) { 526 return siblings(query, true, true); 527 } 528 529 /** 530 * Get the immediate previous element sibling of each element in this list. 531 * @return previous element siblings. 532 */ 533 public Elements prev() { 534 return siblings(null, false, false); 535 } 536 537 /** 538 * Get the immediate previous element sibling of each element in this list, filtered by the query. 539 * @param query CSS query to match siblings against 540 * @return previous element siblings. 541 */ 542 public Elements prev(String query) { 543 return siblings(query, false, false); 544 } 545 546 /** 547 * Get all of the previous element siblings of each element in this list. 548 * @return all previous element siblings. 549 */ 550 public Elements prevAll() { 551 return siblings(null, false, true); 552 } 553 554 /** 555 * Get all of the previous element siblings of each element in this list, filtered by the query. 556 * @param query CSS query to match siblings against 557 * @return all previous element siblings. 558 */ 559 public Elements prevAll(String query) { 560 return siblings(query, false, true); 561 } 562 563 private Elements siblings(String query, boolean next, boolean all) { 564 Elements els = new Elements(); 565 Evaluator eval = query != null? QueryParser.parse(query) : null; 566 for (Element e : this) { 567 do { 568 Element sib = next ? e.nextElementSibling() : e.previousElementSibling(); 569 if (sib == null) break; 570 if (eval == null) 571 els.add(sib); 572 else if (sib.is(eval)) 573 els.add(sib); 574 e = sib; 575 } while (all); 576 } 577 return els; 578 } 579 580 /** 581 * Get all of the parents and ancestor elements of the matched elements. 582 * @return all of the parents and ancestor elements of the matched elements 583 */ 584 public Elements parents() { 585 HashSet<Element> combo = new LinkedHashSet<>(); 586 for (Element e: this) { 587 combo.addAll(e.parents()); 588 } 589 return new Elements(combo); 590 } 591 592 // list-like methods 593 /** 594 Get the first matched element. 595 @return The first matched element, or <code>null</code> if contents is empty. 596 */ 597 public Element first() { 598 return isEmpty() ? null : get(0); 599 } 600 601 /** 602 Get the last matched element. 603 @return The last matched element, or <code>null</code> if contents is empty. 604 */ 605 public Element last() { 606 return isEmpty() ? null : get(size() - 1); 607 } 608 609 /** 610 * Perform a depth-first traversal on each of the selected elements. 611 * @param nodeVisitor the visitor callbacks to perform on each node 612 * @return this, for chaining 613 */ 614 public Elements traverse(NodeVisitor nodeVisitor) { 615 NodeTraversor.traverse(nodeVisitor, this); 616 return this; 617 } 618 619 /** 620 * Perform a depth-first filtering on each of the selected elements. 621 * @param nodeFilter the filter callbacks to perform on each node 622 * @return this, for chaining 623 */ 624 public Elements filter(NodeFilter nodeFilter) { 625 NodeTraversor.filter(nodeFilter, this); 626 return this; 627 } 628 629 /** 630 * Get the {@link FormElement} forms from the selected elements, if any. 631 * @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain 632 * no forms. 633 */ 634 public List<FormElement> forms() { 635 ArrayList<FormElement> forms = new ArrayList<>(); 636 for (Element el: this) 637 if (el instanceof FormElement) 638 forms.add((FormElement) el); 639 return forms; 640 } 641 642}