001package org.jsoup.nodes; 002 003import org.jsoup.helper.ChangeNotifyingArrayList; 004import org.jsoup.helper.StringUtil; 005import org.jsoup.helper.Validate; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.parser.Parser; 008import org.jsoup.parser.Tag; 009import org.jsoup.select.Collector; 010import org.jsoup.select.Elements; 011import org.jsoup.select.Evaluator; 012import org.jsoup.select.NodeTraversor; 013import org.jsoup.select.NodeVisitor; 014import org.jsoup.select.QueryParser; 015import org.jsoup.select.Selector; 016 017import java.io.IOException; 018import java.lang.ref.WeakReference; 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.Collections; 023import java.util.LinkedHashSet; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import java.util.regex.Pattern; 028import java.util.regex.PatternSyntaxException; 029 030import static org.jsoup.internal.Normalizer.normalize; 031 032/** 033 * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and 034 * other elements). 035 * 036 * From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 037 * 038 * @author Jonathan Hedley, jonathan@hedley.net 039 */ 040public class Element extends Node { 041 private static final List<Node> EMPTY_NODES = Collections.emptyList(); 042 private static final Pattern classSplit = Pattern.compile("\\s+"); 043 private Tag tag; 044 private WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children 045 List<Node> childNodes; 046 private Attributes attributes; 047 private String baseUri; 048 049 /** 050 * Create a new, standalone element. 051 * @param tag tag name 052 */ 053 public Element(String tag) { 054 this(Tag.valueOf(tag), "", new Attributes()); 055 } 056 057 /** 058 * Create a new, standalone Element. (Standalone in that is has no parent.) 059 * 060 * @param tag tag of this element 061 * @param baseUri the base URI 062 * @param attributes initial attributes 063 * @see #appendChild(Node) 064 * @see #appendElement(String) 065 */ 066 public Element(Tag tag, String baseUri, Attributes attributes) { 067 Validate.notNull(tag); 068 Validate.notNull(baseUri); 069 childNodes = EMPTY_NODES; 070 this.baseUri = baseUri; 071 this.attributes = attributes; 072 this.tag = tag; 073 } 074 075 /** 076 * Create a new Element from a tag and a base URI. 077 * 078 * @param tag element tag 079 * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty 080 * string, but not null. 081 * @see Tag#valueOf(String, ParseSettings) 082 */ 083 public Element(Tag tag, String baseUri) { 084 this(tag, baseUri, null); 085 } 086 087 protected List<Node> ensureChildNodes() { 088 if (childNodes == EMPTY_NODES) { 089 childNodes = new NodeList(this, 4); 090 } 091 return childNodes; 092 } 093 094 @Override 095 protected boolean hasAttributes() { 096 return attributes != null; 097 } 098 099 @Override 100 public Attributes attributes() { 101 if (!hasAttributes()) 102 attributes = new Attributes(); 103 return attributes; 104 } 105 106 @Override 107 public String baseUri() { 108 return baseUri; 109 } 110 111 @Override 112 protected void doSetBaseUri(String baseUri) { 113 this.baseUri = baseUri; 114 } 115 116 @Override 117 public int childNodeSize() { 118 return childNodes.size(); 119 } 120 121 @Override 122 public String nodeName() { 123 return tag.getName(); 124 } 125 126 /** 127 * Get the name of the tag for this element. E.g. {@code div} 128 * 129 * @return the tag name 130 */ 131 public String tagName() { 132 return tag.getName(); 133 } 134 135 /** 136 * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 137 * {@code el.tagName("div");}. 138 * 139 * @param tagName new tag name for this element 140 * @return this element, for chaining 141 */ 142 public Element tagName(String tagName) { 143 Validate.notEmpty(tagName, "Tag name must not be empty."); 144 tag = Tag.valueOf(tagName, ParseSettings.preserveCase); // preserve the requested tag case 145 return this; 146 } 147 148 /** 149 * Get the Tag for this element. 150 * 151 * @return the tag object 152 */ 153 public Tag tag() { 154 return tag; 155 } 156 157 /** 158 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 159 * {@code <p> == false}). 160 * 161 * @return true if block, false if not (and thus inline) 162 */ 163 public boolean isBlock() { 164 return tag.isBlock(); 165 } 166 167 /** 168 * Get the {@code id} attribute of this element. 169 * 170 * @return The id attribute, if present, or an empty string if not. 171 */ 172 public String id() { 173 return attributes().getIgnoreCase("id"); 174 } 175 176 /** 177 * Set an attribute value on this element. If this element already has an attribute with the 178 * key, its value is updated; otherwise, a new attribute is added. 179 * 180 * @return this element 181 */ 182 public Element attr(String attributeKey, String attributeValue) { 183 super.attr(attributeKey, attributeValue); 184 return this; 185 } 186 187 /** 188 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 189 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 190 * with the same key if it exists. 191 * 192 * @param attributeKey the attribute key 193 * @param attributeValue the attribute value 194 * 195 * @return this element 196 */ 197 public Element attr(String attributeKey, boolean attributeValue) { 198 attributes().put(attributeKey, attributeValue); 199 return this; 200 } 201 202 /** 203 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 204 * starting with "data-" is included the dataset. 205 * <p> 206 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 207 * {@code package=jsoup, language=java}. 208 * <p> 209 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 210 * in the other map. 211 * <p> 212 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 213 * @return a map of {@code key=value} custom data attributes. 214 */ 215 public Map<String, String> dataset() { 216 return attributes().dataset(); 217 } 218 219 @Override 220 public final Element parent() { 221 return (Element) parentNode; 222 } 223 224 /** 225 * Get this element's parent and ancestors, up to the document root. 226 * @return this element's stack of parents, closest first. 227 */ 228 public Elements parents() { 229 Elements parents = new Elements(); 230 accumulateParents(this, parents); 231 return parents; 232 } 233 234 private static void accumulateParents(Element el, Elements parents) { 235 Element parent = el.parent(); 236 if (parent != null && !parent.tagName().equals("#root")) { 237 parents.add(parent); 238 accumulateParents(parent, parents); 239 } 240 } 241 242 /** 243 * Get a child element of this element, by its 0-based index number. 244 * <p> 245 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 246 * a filtered list of children that are elements, and the index is based on that filtered list. 247 * </p> 248 * 249 * @param index the index number of the element to retrieve 250 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 251 * @see #childNode(int) 252 */ 253 public Element child(int index) { 254 return childElementsList().get(index); 255 } 256 257 /** 258 * Get this element's child elements. 259 * <p> 260 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 261 * </p> 262 * @return child elements. If this element has no children, returns an empty list. 263 * @see #childNodes() 264 */ 265 public Elements children() { 266 return new Elements(childElementsList()); 267 } 268 269 /** 270 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 271 * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. 272 * @return a list of child elements 273 */ 274 private List<Element> childElementsList() { 275 List<Element> children; 276 if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) { 277 final int size = childNodes.size(); 278 children = new ArrayList<>(size); 279 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 280 for (int i = 0; i < size; i++) { 281 final Node node = childNodes.get(i); 282 if (node instanceof Element) 283 children.add((Element) node); 284 } 285 shadowChildrenRef = new WeakReference<>(children); 286 } 287 return children; 288 } 289 290 /** 291 * Clears the cached shadow child elements. 292 */ 293 @Override 294 void nodelistChanged() { 295 super.nodelistChanged(); 296 shadowChildrenRef = null; 297 } 298 299 /** 300 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 301 * <p> 302 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 303 * @return child text nodes. If this element has no text nodes, returns an 304 * empty list. 305 * </p> 306 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 307 * <ul> 308 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 309 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 310 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 311 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 312 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 313 * </ul> 314 */ 315 public List<TextNode> textNodes() { 316 List<TextNode> textNodes = new ArrayList<>(); 317 for (Node node : childNodes) { 318 if (node instanceof TextNode) 319 textNodes.add((TextNode) node); 320 } 321 return Collections.unmodifiableList(textNodes); 322 } 323 324 /** 325 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 326 * <p> 327 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 328 * </p> 329 * @return child data nodes. If this element has no data nodes, returns an 330 * empty list. 331 * @see #data() 332 */ 333 public List<DataNode> dataNodes() { 334 List<DataNode> dataNodes = new ArrayList<>(); 335 for (Node node : childNodes) { 336 if (node instanceof DataNode) 337 dataNodes.add((DataNode) node); 338 } 339 return Collections.unmodifiableList(dataNodes); 340 } 341 342 /** 343 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 344 * may include this element, or any of its children. 345 * <p> 346 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 347 * multiple filters can be combined, e.g.: 348 * </p> 349 * <ul> 350 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) 351 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) 352 * </ul> 353 * <p> 354 * See the query syntax documentation in {@link org.jsoup.select.Selector}. 355 * </p> 356 * 357 * @param cssQuery a {@link Selector} CSS-like query 358 * @return elements that match the query (empty if none match) 359 * @see org.jsoup.select.Selector 360 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 361 */ 362 public Elements select(String cssQuery) { 363 return Selector.select(cssQuery, this); 364 } 365 366 /** 367 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 368 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 369 * execution stops on the first hit.</p> 370 * @param cssQuery cssQuery a {@link Selector} CSS-like query 371 * @return the first matching element, or <b>{@code null}</b> if there is no match. 372 */ 373 public Element selectFirst(String cssQuery) { 374 return Selector.selectFirst(cssQuery, this); 375 } 376 377 /** 378 * Check if this element matches the given {@link Selector} CSS query. 379 * @param cssQuery a {@link Selector} CSS query 380 * @return if this element matches the query 381 */ 382 public boolean is(String cssQuery) { 383 return is(QueryParser.parse(cssQuery)); 384 } 385 386 /** 387 * Check if this element matches the given evaluator. 388 * @param evaluator an element evaluator 389 * @return if this element matches 390 */ 391 public boolean is(Evaluator evaluator) { 392 return evaluator.matches((Element)this.root(), this); 393 } 394 395 /** 396 * Add a node child node to this element. 397 * 398 * @param child node to add. 399 * @return this element, so that you can add more child nodes or elements. 400 */ 401 public Element appendChild(Node child) { 402 Validate.notNull(child); 403 404 // was - Node#addChildren(child). short-circuits an array create and a loop. 405 reparentChild(child); 406 ensureChildNodes(); 407 childNodes.add(child); 408 child.setSiblingIndex(childNodes.size() - 1); 409 return this; 410 } 411 412 /** 413 * Add this element to the supplied parent element, as its next child. 414 * 415 * @param parent element to which this element will be appended 416 * @return this element, so that you can continue modifying the element 417 */ 418 public Element appendTo(Element parent) { 419 Validate.notNull(parent); 420 parent.appendChild(this); 421 return this; 422 } 423 424 /** 425 * Add a node to the start of this element's children. 426 * 427 * @param child node to add. 428 * @return this element, so that you can add more child nodes or elements. 429 */ 430 public Element prependChild(Node child) { 431 Validate.notNull(child); 432 433 addChildren(0, child); 434 return this; 435 } 436 437 438 /** 439 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 440 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 441 * 442 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 443 * end 444 * @param children child nodes to insert 445 * @return this element, for chaining. 446 */ 447 public Element insertChildren(int index, Collection<? extends Node> children) { 448 Validate.notNull(children, "Children collection to be inserted must not be null."); 449 int currentSize = childNodeSize(); 450 if (index < 0) index += currentSize +1; // roll around 451 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 452 453 ArrayList<Node> nodes = new ArrayList<>(children); 454 Node[] nodeArray = nodes.toArray(new Node[nodes.size()]); 455 addChildren(index, nodeArray); 456 return this; 457 } 458 459 /** 460 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 461 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 462 * 463 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 464 * end 465 * @param children child nodes to insert 466 * @return this element, for chaining. 467 */ 468 public Element insertChildren(int index, Node... children) { 469 Validate.notNull(children, "Children collection to be inserted must not be null."); 470 int currentSize = childNodeSize(); 471 if (index < 0) index += currentSize +1; // roll around 472 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 473 474 addChildren(index, children); 475 return this; 476 } 477 478 /** 479 * Create a new element by tag name, and add it as the last child. 480 * 481 * @param tagName the name of the tag (e.g. {@code div}). 482 * @return the new element, to allow you to add content to it, e.g.: 483 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 484 */ 485 public Element appendElement(String tagName) { 486 Element child = new Element(Tag.valueOf(tagName), baseUri()); 487 appendChild(child); 488 return child; 489 } 490 491 /** 492 * Create a new element by tag name, and add it as the first child. 493 * 494 * @param tagName the name of the tag (e.g. {@code div}). 495 * @return the new element, to allow you to add content to it, e.g.: 496 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 497 */ 498 public Element prependElement(String tagName) { 499 Element child = new Element(Tag.valueOf(tagName), baseUri()); 500 prependChild(child); 501 return child; 502 } 503 504 /** 505 * Create and append a new TextNode to this element. 506 * 507 * @param text the unencoded text to add 508 * @return this element 509 */ 510 public Element appendText(String text) { 511 Validate.notNull(text); 512 TextNode node = new TextNode(text); 513 appendChild(node); 514 return this; 515 } 516 517 /** 518 * Create and prepend a new TextNode to this element. 519 * 520 * @param text the unencoded text to add 521 * @return this element 522 */ 523 public Element prependText(String text) { 524 Validate.notNull(text); 525 TextNode node = new TextNode(text); 526 prependChild(node); 527 return this; 528 } 529 530 /** 531 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 532 * @param html HTML to add inside this element, after the existing HTML 533 * @return this element 534 * @see #html(String) 535 */ 536 public Element append(String html) { 537 Validate.notNull(html); 538 539 List<Node> nodes = Parser.parseFragment(html, this, baseUri()); 540 addChildren(nodes.toArray(new Node[nodes.size()])); 541 return this; 542 } 543 544 /** 545 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 546 * @param html HTML to add inside this element, before the existing HTML 547 * @return this element 548 * @see #html(String) 549 */ 550 public Element prepend(String html) { 551 Validate.notNull(html); 552 553 List<Node> nodes = Parser.parseFragment(html, this, baseUri()); 554 addChildren(0, nodes.toArray(new Node[nodes.size()])); 555 return this; 556 } 557 558 /** 559 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 560 * 561 * @param html HTML to add before this element 562 * @return this element, for chaining 563 * @see #after(String) 564 */ 565 @Override 566 public Element before(String html) { 567 return (Element) super.before(html); 568 } 569 570 /** 571 * Insert the specified node into the DOM before this node (as a preceding sibling). 572 * @param node to add before this element 573 * @return this Element, for chaining 574 * @see #after(Node) 575 */ 576 @Override 577 public Element before(Node node) { 578 return (Element) super.before(node); 579 } 580 581 /** 582 * Insert the specified HTML into the DOM after this element (as a following sibling). 583 * 584 * @param html HTML to add after this element 585 * @return this element, for chaining 586 * @see #before(String) 587 */ 588 @Override 589 public Element after(String html) { 590 return (Element) super.after(html); 591 } 592 593 /** 594 * Insert the specified node into the DOM after this node (as a following sibling). 595 * @param node to add after this element 596 * @return this element, for chaining 597 * @see #before(Node) 598 */ 599 @Override 600 public Element after(Node node) { 601 return (Element) super.after(node); 602 } 603 604 /** 605 * Remove all of the element's child nodes. Any attributes are left as-is. 606 * @return this element 607 */ 608 public Element empty() { 609 childNodes.clear(); 610 return this; 611 } 612 613 /** 614 * Wrap the supplied HTML around this element. 615 * 616 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 617 * @return this element, for chaining. 618 */ 619 @Override 620 public Element wrap(String html) { 621 return (Element) super.wrap(html); 622 } 623 624 /** 625 * Get a CSS selector that will uniquely select this element. 626 * <p> 627 * If the element has an ID, returns #id; 628 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, 629 * followed by a unique selector for the element (tag.class.class:nth-child(n)). 630 * </p> 631 * 632 * @return the CSS Path that can be used to retrieve the element in a selector. 633 */ 634 public String cssSelector() { 635 if (id().length() > 0) 636 return "#" + id(); 637 638 // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag 639 String tagName = tagName().replace(':', '|'); 640 StringBuilder selector = new StringBuilder(tagName); 641 String classes = StringUtil.join(classNames(), "."); 642 if (classes.length() > 0) 643 selector.append('.').append(classes); 644 645 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 646 return selector.toString(); 647 648 selector.insert(0, " > "); 649 if (parent().select(selector.toString()).size() > 1) 650 selector.append(String.format( 651 ":nth-child(%d)", elementSiblingIndex() + 1)); 652 653 return parent().cssSelector() + selector.toString(); 654 } 655 656 /** 657 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 658 * of itself, so will not be included in the returned list. 659 * @return sibling elements 660 */ 661 public Elements siblingElements() { 662 if (parentNode == null) 663 return new Elements(0); 664 665 List<Element> elements = parent().childElementsList(); 666 Elements siblings = new Elements(elements.size() - 1); 667 for (Element el: elements) 668 if (el != this) 669 siblings.add(el); 670 return siblings; 671 } 672 673 /** 674 * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 675 * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. 676 * <p> 677 * This is similar to {@link #nextSibling()}, but specifically finds only Elements 678 * </p> 679 * @return the next element, or null if there is no next element 680 * @see #previousElementSibling() 681 */ 682 public Element nextElementSibling() { 683 if (parentNode == null) return null; 684 List<Element> siblings = parent().childElementsList(); 685 Integer index = indexInList(this, siblings); 686 Validate.notNull(index); 687 if (siblings.size() > index+1) 688 return siblings.get(index+1); 689 else 690 return null; 691 } 692 693 /** 694 * Gets the previous element sibling of this element. 695 * @return the previous element, or null if there is no previous element 696 * @see #nextElementSibling() 697 */ 698 public Element previousElementSibling() { 699 if (parentNode == null) return null; 700 List<Element> siblings = parent().childElementsList(); 701 Integer index = indexInList(this, siblings); 702 Validate.notNull(index); 703 if (index > 0) 704 return siblings.get(index-1); 705 else 706 return null; 707 } 708 709 /** 710 * Gets the first element sibling of this element. 711 * @return the first sibling that is an element (aka the parent's first element child) 712 */ 713 public Element firstElementSibling() { 714 // todo: should firstSibling() exclude this? 715 List<Element> siblings = parent().childElementsList(); 716 return siblings.size() > 1 ? siblings.get(0) : null; 717 } 718 719 /** 720 * Get the list index of this element in its element sibling list. I.e. if this is the first element 721 * sibling, returns 0. 722 * @return position in element sibling list 723 */ 724 public int elementSiblingIndex() { 725 if (parent() == null) return 0; 726 return indexInList(this, parent().childElementsList()); 727 } 728 729 /** 730 * Gets the last element sibling of this element 731 * @return the last sibling that is an element (aka the parent's last element child) 732 */ 733 public Element lastElementSibling() { 734 List<Element> siblings = parent().childElementsList(); 735 return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null; 736 } 737 738 private static <E extends Element> int indexInList(Element search, List<E> elements) { 739 for (int i = 0; i < elements.size(); i++) { 740 if (elements.get(i) == search) 741 return i; 742 } 743 return 0; 744 } 745 746 // DOM type methods 747 748 /** 749 * Finds elements, including and recursively under this element, with the specified tag name. 750 * @param tagName The tag name to search for (case insensitively). 751 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 752 */ 753 public Elements getElementsByTag(String tagName) { 754 Validate.notEmpty(tagName); 755 tagName = normalize(tagName); 756 757 return Collector.collect(new Evaluator.Tag(tagName), this); 758 } 759 760 /** 761 * Find an element by ID, including or under this element. 762 * <p> 763 * Note that this finds the first matching ID, starting with this element. If you search down from a different 764 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 765 * use {@link Document#getElementById(String)} 766 * @param id The ID to search for. 767 * @return The first matching element by ID, starting with this element, or null if none found. 768 */ 769 public Element getElementById(String id) { 770 Validate.notEmpty(id); 771 772 Elements elements = Collector.collect(new Evaluator.Id(id), this); 773 if (elements.size() > 0) 774 return elements.get(0); 775 else 776 return null; 777 } 778 779 /** 780 * Find elements that have this class, including or under this element. Case insensitive. 781 * <p> 782 * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method 783 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 784 * 785 * @param className the name of the class to search for. 786 * @return elements with the supplied class name, empty if none 787 * @see #hasClass(String) 788 * @see #classNames() 789 */ 790 public Elements getElementsByClass(String className) { 791 Validate.notEmpty(className); 792 793 return Collector.collect(new Evaluator.Class(className), this); 794 } 795 796 /** 797 * Find elements that have a named attribute set. Case insensitive. 798 * 799 * @param key name of the attribute, e.g. {@code href} 800 * @return elements that have this attribute, empty if none 801 */ 802 public Elements getElementsByAttribute(String key) { 803 Validate.notEmpty(key); 804 key = key.trim(); 805 806 return Collector.collect(new Evaluator.Attribute(key), this); 807 } 808 809 /** 810 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 811 * that have HTML5 datasets. 812 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 813 * @return elements that have attribute names that start with with the prefix, empty if none. 814 */ 815 public Elements getElementsByAttributeStarting(String keyPrefix) { 816 Validate.notEmpty(keyPrefix); 817 keyPrefix = keyPrefix.trim(); 818 819 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 820 } 821 822 /** 823 * Find elements that have an attribute with the specific value. Case insensitive. 824 * 825 * @param key name of the attribute 826 * @param value value of the attribute 827 * @return elements that have this attribute with this value, empty if none 828 */ 829 public Elements getElementsByAttributeValue(String key, String value) { 830 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 831 } 832 833 /** 834 * Find elements that either do not have this attribute, or have it with a different value. Case insensitive. 835 * 836 * @param key name of the attribute 837 * @param value value of the attribute 838 * @return elements that do not have a matching attribute 839 */ 840 public Elements getElementsByAttributeValueNot(String key, String value) { 841 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 842 } 843 844 /** 845 * Find elements that have attributes that start with the value prefix. Case insensitive. 846 * 847 * @param key name of the attribute 848 * @param valuePrefix start of attribute value 849 * @return elements that have attributes that start with the value prefix 850 */ 851 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 852 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 853 } 854 855 /** 856 * Find elements that have attributes that end with the value suffix. Case insensitive. 857 * 858 * @param key name of the attribute 859 * @param valueSuffix end of the attribute value 860 * @return elements that have attributes that end with the value suffix 861 */ 862 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 863 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 864 } 865 866 /** 867 * Find elements that have attributes whose value contains the match string. Case insensitive. 868 * 869 * @param key name of the attribute 870 * @param match substring of value to search for 871 * @return elements that have attributes containing this text 872 */ 873 public Elements getElementsByAttributeValueContaining(String key, String match) { 874 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 875 } 876 877 /** 878 * Find elements that have attributes whose values match the supplied regular expression. 879 * @param key name of the attribute 880 * @param pattern compiled regular expression to match against attribute values 881 * @return elements that have attributes matching this regular expression 882 */ 883 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 884 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 885 886 } 887 888 /** 889 * Find elements that have attributes whose values match the supplied regular expression. 890 * @param key name of the attribute 891 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 892 * @return elements that have attributes matching this regular expression 893 */ 894 public Elements getElementsByAttributeValueMatching(String key, String regex) { 895 Pattern pattern; 896 try { 897 pattern = Pattern.compile(regex); 898 } catch (PatternSyntaxException e) { 899 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 900 } 901 return getElementsByAttributeValueMatching(key, pattern); 902 } 903 904 /** 905 * Find elements whose sibling index is less than the supplied index. 906 * @param index 0-based index 907 * @return elements less than index 908 */ 909 public Elements getElementsByIndexLessThan(int index) { 910 return Collector.collect(new Evaluator.IndexLessThan(index), this); 911 } 912 913 /** 914 * Find elements whose sibling index is greater than the supplied index. 915 * @param index 0-based index 916 * @return elements greater than index 917 */ 918 public Elements getElementsByIndexGreaterThan(int index) { 919 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 920 } 921 922 /** 923 * Find elements whose sibling index is equal to the supplied index. 924 * @param index 0-based index 925 * @return elements equal to index 926 */ 927 public Elements getElementsByIndexEquals(int index) { 928 return Collector.collect(new Evaluator.IndexEquals(index), this); 929 } 930 931 /** 932 * Find elements that contain the specified string. The search is case insensitive. The text may appear directly 933 * in the element, or in any of its descendants. 934 * @param searchText to look for in the element's text 935 * @return elements that contain the string, case insensitive. 936 * @see Element#text() 937 */ 938 public Elements getElementsContainingText(String searchText) { 939 return Collector.collect(new Evaluator.ContainsText(searchText), this); 940 } 941 942 /** 943 * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly 944 * in the element, not in any of its descendants. 945 * @param searchText to look for in the element's own text 946 * @return elements that contain the string, case insensitive. 947 * @see Element#ownText() 948 */ 949 public Elements getElementsContainingOwnText(String searchText) { 950 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 951 } 952 953 /** 954 * Find elements whose text matches the supplied regular expression. 955 * @param pattern regular expression to match text against 956 * @return elements matching the supplied regular expression. 957 * @see Element#text() 958 */ 959 public Elements getElementsMatchingText(Pattern pattern) { 960 return Collector.collect(new Evaluator.Matches(pattern), this); 961 } 962 963 /** 964 * Find elements whose text matches the supplied regular expression. 965 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 966 * @return elements matching the supplied regular expression. 967 * @see Element#text() 968 */ 969 public Elements getElementsMatchingText(String regex) { 970 Pattern pattern; 971 try { 972 pattern = Pattern.compile(regex); 973 } catch (PatternSyntaxException e) { 974 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 975 } 976 return getElementsMatchingText(pattern); 977 } 978 979 /** 980 * Find elements whose own text matches the supplied regular expression. 981 * @param pattern regular expression to match text against 982 * @return elements matching the supplied regular expression. 983 * @see Element#ownText() 984 */ 985 public Elements getElementsMatchingOwnText(Pattern pattern) { 986 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 987 } 988 989 /** 990 * Find elements whose text matches the supplied regular expression. 991 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 992 * @return elements matching the supplied regular expression. 993 * @see Element#ownText() 994 */ 995 public Elements getElementsMatchingOwnText(String regex) { 996 Pattern pattern; 997 try { 998 pattern = Pattern.compile(regex); 999 } catch (PatternSyntaxException e) { 1000 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1001 } 1002 return getElementsMatchingOwnText(pattern); 1003 } 1004 1005 /** 1006 * Find all elements under this element (including self, and children of children). 1007 * 1008 * @return all elements 1009 */ 1010 public Elements getAllElements() { 1011 return Collector.collect(new Evaluator.AllElements(), this); 1012 } 1013 1014 /** 1015 * Gets the combined text of this element and all its children. Whitespace is normalized and trimmed. 1016 * <p> 1017 * For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there now!"} 1018 * 1019 * @return unencoded text, or empty string if none. 1020 * @see #ownText() 1021 * @see #textNodes() 1022 */ 1023 public String text() { 1024 final StringBuilder accum = new StringBuilder(); 1025 NodeTraversor.traverse(new NodeVisitor() { 1026 public void head(Node node, int depth) { 1027 if (node instanceof TextNode) { 1028 TextNode textNode = (TextNode) node; 1029 appendNormalisedText(accum, textNode); 1030 } else if (node instanceof Element) { 1031 Element element = (Element) node; 1032 if (accum.length() > 0 && 1033 (element.isBlock() || element.tag.getName().equals("br")) && 1034 !TextNode.lastCharIsWhitespace(accum)) 1035 accum.append(' '); 1036 } 1037 } 1038 1039 public void tail(Node node, int depth) { 1040 } 1041 }, this); 1042 return accum.toString().trim(); 1043 } 1044 1045 /** 1046 * Gets the text owned by this element only; does not get the combined text of all children. 1047 * <p> 1048 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1049 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1050 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1051 * 1052 * @return unencoded text, or empty string if none. 1053 * @see #text() 1054 * @see #textNodes() 1055 */ 1056 public String ownText() { 1057 StringBuilder sb = new StringBuilder(); 1058 ownText(sb); 1059 return sb.toString().trim(); 1060 } 1061 1062 private void ownText(StringBuilder accum) { 1063 for (Node child : childNodes) { 1064 if (child instanceof TextNode) { 1065 TextNode textNode = (TextNode) child; 1066 appendNormalisedText(accum, textNode); 1067 } else if (child instanceof Element) { 1068 appendWhitespaceIfBr((Element) child, accum); 1069 } 1070 } 1071 } 1072 1073 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1074 String text = textNode.getWholeText(); 1075 1076 if (preserveWhitespace(textNode.parentNode)) 1077 accum.append(text); 1078 else 1079 StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum)); 1080 } 1081 1082 private static void appendWhitespaceIfBr(Element element, StringBuilder accum) { 1083 if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum)) 1084 accum.append(" "); 1085 } 1086 1087 static boolean preserveWhitespace(Node node) { 1088 // looks only at this element and one level up, to prevent recursion & needless stack searches 1089 if (node != null && node instanceof Element) { 1090 Element element = (Element) node; 1091 return element.tag.preserveWhitespace() || 1092 element.parent() != null && element.parent().tag.preserveWhitespace(); 1093 } 1094 return false; 1095 } 1096 1097 /** 1098 * Set the text of this element. Any existing contents (text or elements) will be cleared 1099 * @param text unencoded text 1100 * @return this element 1101 */ 1102 public Element text(String text) { 1103 Validate.notNull(text); 1104 1105 empty(); 1106 TextNode textNode = new TextNode(text); 1107 appendChild(textNode); 1108 1109 return this; 1110 } 1111 1112 /** 1113 Test if this element has any text content (that is not just whitespace). 1114 @return true if element has non-blank text content. 1115 */ 1116 public boolean hasText() { 1117 for (Node child: childNodes) { 1118 if (child instanceof TextNode) { 1119 TextNode textNode = (TextNode) child; 1120 if (!textNode.isBlank()) 1121 return true; 1122 } else if (child instanceof Element) { 1123 Element el = (Element) child; 1124 if (el.hasText()) 1125 return true; 1126 } 1127 } 1128 return false; 1129 } 1130 1131 /** 1132 * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag. Note that data is NOT the 1133 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@link #data()} 1134 * for the contents of scripts, comments, CSS styles, etc. 1135 * 1136 * @return the data, or empty string if none 1137 * 1138 * @see #dataNodes() 1139 */ 1140 public String data() { 1141 StringBuilder sb = new StringBuilder(); 1142 1143 for (Node childNode : childNodes) { 1144 if (childNode instanceof DataNode) { 1145 DataNode data = (DataNode) childNode; 1146 sb.append(data.getWholeData()); 1147 } else if (childNode instanceof Comment) { 1148 Comment comment = (Comment) childNode; 1149 sb.append(comment.getData()); 1150 } else if (childNode instanceof Element) { 1151 Element element = (Element) childNode; 1152 String elementData = element.data(); 1153 sb.append(elementData); 1154 } 1155 } 1156 return sb.toString(); 1157 } 1158 1159 /** 1160 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1161 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1162 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1163 */ 1164 public String className() { 1165 return attr("class").trim(); 1166 } 1167 1168 /** 1169 * Get all of the element's class names. E.g. on element {@code <div class="header gray">}, 1170 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1171 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1172 * @return set of classnames, empty if no class attribute 1173 */ 1174 public Set<String> classNames() { 1175 String[] names = classSplit.split(className()); 1176 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1177 classNames.remove(""); // if classNames() was empty, would include an empty class 1178 1179 return classNames; 1180 } 1181 1182 /** 1183 Set the element's {@code class} attribute to the supplied class names. 1184 @param classNames set of classes 1185 @return this element, for chaining 1186 */ 1187 public Element classNames(Set<String> classNames) { 1188 Validate.notNull(classNames); 1189 attributes().put("class", StringUtil.join(classNames, " ")); 1190 return this; 1191 } 1192 1193 /** 1194 * Tests if this element has a class. Case insensitive. 1195 * @param className name of class to check for 1196 * @return true if it does, false if not 1197 */ 1198 // performance sensitive 1199 public boolean hasClass(String className) { 1200 final String classAttr = attributes().getIgnoreCase("class"); 1201 final int len = classAttr.length(); 1202 final int wantLen = className.length(); 1203 1204 if (len == 0 || len < wantLen) { 1205 return false; 1206 } 1207 1208 // if both lengths are equal, only need compare the className with the attribute 1209 if (len == wantLen) { 1210 return className.equalsIgnoreCase(classAttr); 1211 } 1212 1213 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1214 boolean inClass = false; 1215 int start = 0; 1216 for (int i = 0; i < len; i++) { 1217 if (Character.isWhitespace(classAttr.charAt(i))) { 1218 if (inClass) { 1219 // white space ends a class name, compare it with the requested one, ignore case 1220 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1221 return true; 1222 } 1223 inClass = false; 1224 } 1225 } else { 1226 if (!inClass) { 1227 // we're in a class name : keep the start of the substring 1228 inClass = true; 1229 start = i; 1230 } 1231 } 1232 } 1233 1234 // check the last entry 1235 if (inClass && len - start == wantLen) { 1236 return classAttr.regionMatches(true, start, className, 0, wantLen); 1237 } 1238 1239 return false; 1240 } 1241 1242 /** 1243 Add a class name to this element's {@code class} attribute. 1244 @param className class name to add 1245 @return this element 1246 */ 1247 public Element addClass(String className) { 1248 Validate.notNull(className); 1249 1250 Set<String> classes = classNames(); 1251 classes.add(className); 1252 classNames(classes); 1253 1254 return this; 1255 } 1256 1257 /** 1258 Remove a class name from this element's {@code class} attribute. 1259 @param className class name to remove 1260 @return this element 1261 */ 1262 public Element removeClass(String className) { 1263 Validate.notNull(className); 1264 1265 Set<String> classes = classNames(); 1266 classes.remove(className); 1267 classNames(classes); 1268 1269 return this; 1270 } 1271 1272 /** 1273 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1274 @param className class name to toggle 1275 @return this element 1276 */ 1277 public Element toggleClass(String className) { 1278 Validate.notNull(className); 1279 1280 Set<String> classes = classNames(); 1281 if (classes.contains(className)) 1282 classes.remove(className); 1283 else 1284 classes.add(className); 1285 classNames(classes); 1286 1287 return this; 1288 } 1289 1290 /** 1291 * Get the value of a form element (input, textarea, etc). 1292 * @return the value of the form element, or empty string if not set. 1293 */ 1294 public String val() { 1295 if (tagName().equals("textarea")) 1296 return text(); 1297 else 1298 return attr("value"); 1299 } 1300 1301 /** 1302 * Set the value of a form element (input, textarea, etc). 1303 * @param value value to set 1304 * @return this element (for chaining) 1305 */ 1306 public Element val(String value) { 1307 if (tagName().equals("textarea")) 1308 text(value); 1309 else 1310 attr("value", value); 1311 return this; 1312 } 1313 1314 void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { 1315 if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline())) { 1316 if (accum instanceof StringBuilder) { 1317 if (((StringBuilder) accum).length() > 0) 1318 indent(accum, depth, out); 1319 } else { 1320 indent(accum, depth, out); 1321 } 1322 } 1323 accum.append('<').append(tagName()); 1324 if (attributes != null) attributes.html(accum, out); 1325 1326 // selfclosing includes unknown tags, isEmpty defines tags that are always empty 1327 if (childNodes.isEmpty() && tag.isSelfClosing()) { 1328 if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) 1329 accum.append('>'); 1330 else 1331 accum.append(" />"); // <img> in html, <img /> in xml 1332 } 1333 else 1334 accum.append('>'); 1335 } 1336 1337 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 1338 if (!(childNodes.isEmpty() && tag.isSelfClosing())) { 1339 if (out.prettyPrint() && (!childNodes.isEmpty() && ( 1340 tag.formatAsBlock() || (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && !(childNodes.get(0) instanceof TextNode)))) 1341 ))) 1342 indent(accum, depth, out); 1343 accum.append("</").append(tagName()).append('>'); 1344 } 1345 } 1346 1347 /** 1348 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1349 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1350 * 1351 * @return String of HTML. 1352 * @see #outerHtml() 1353 */ 1354 public String html() { 1355 StringBuilder accum = StringUtil.stringBuilder(); 1356 html(accum); 1357 return getOutputSettings().prettyPrint() ? accum.toString().trim() : accum.toString(); 1358 } 1359 1360 private void html(StringBuilder accum) { 1361 for (Node node : childNodes) 1362 node.outerHtml(accum); 1363 } 1364 1365 /** 1366 * {@inheritDoc} 1367 */ 1368 @Override 1369 public <T extends Appendable> T html(T appendable) { 1370 for (Node node : childNodes) 1371 node.outerHtml(appendable); 1372 1373 return appendable; 1374 } 1375 1376 /** 1377 * Set this element's inner HTML. Clears the existing HTML first. 1378 * @param html HTML to parse and set into this element 1379 * @return this element 1380 * @see #append(String) 1381 */ 1382 public Element html(String html) { 1383 empty(); 1384 append(html); 1385 return this; 1386 } 1387 1388 public String toString() { 1389 return outerHtml(); 1390 } 1391 1392 @Override 1393 public Element clone() { 1394 return (Element) super.clone(); 1395 } 1396 1397 @Override 1398 protected Element doClone(Node parent) { 1399 Element clone = (Element) super.doClone(parent); 1400 clone.attributes = attributes != null ? attributes.clone() : null; 1401 clone.baseUri = baseUri; 1402 clone.childNodes = new NodeList(clone, childNodes.size()); 1403 clone.childNodes.addAll(childNodes); 1404 1405 return clone; 1406 } 1407 1408 private static final class NodeList extends ChangeNotifyingArrayList<Node> { 1409 private final Element owner; 1410 1411 NodeList(Element owner, int initialCapacity) { 1412 super(initialCapacity); 1413 this.owner = owner; 1414 } 1415 1416 public void onContentsChanged() { 1417 owner.nodelistChanged(); 1418 } 1419 } 1420}