001package org.jsoup.nodes;
002
003import org.jsoup.helper.ChangeNotifyingArrayList;
004import org.jsoup.helper.StringUtil;
005import org.jsoup.helper.Validate;
006import org.jsoup.parser.ParseSettings;
007import org.jsoup.parser.Parser;
008import org.jsoup.parser.Tag;
009import org.jsoup.select.Collector;
010import org.jsoup.select.Elements;
011import org.jsoup.select.Evaluator;
012import org.jsoup.select.NodeTraversor;
013import org.jsoup.select.NodeVisitor;
014import org.jsoup.select.QueryParser;
015import org.jsoup.select.Selector;
016
017import java.io.IOException;
018import java.lang.ref.WeakReference;
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.Collection;
022import java.util.Collections;
023import java.util.LinkedHashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import java.util.regex.Pattern;
028import java.util.regex.PatternSyntaxException;
029
030import static org.jsoup.internal.Normalizer.normalize;
031
032/**
033 * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
034 * other elements).
035 * 
036 * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
037 * 
038 * @author Jonathan Hedley, jonathan@hedley.net
039 */
040public class Element extends Node {
041    private static final List<Node> EMPTY_NODES = Collections.emptyList();
042    private static final Pattern classSplit = Pattern.compile("\\s+");
043    private Tag tag;
044    private WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
045    List<Node> childNodes;
046    private Attributes attributes;
047    private String baseUri;
048
049    /**
050     * Create a new, standalone element.
051     * @param tag tag name
052     */
053    public Element(String tag) {
054        this(Tag.valueOf(tag), "", new Attributes());
055    }
056
057    /**
058     * Create a new, standalone Element. (Standalone in that is has no parent.)
059     * 
060     * @param tag tag of this element
061     * @param baseUri the base URI
062     * @param attributes initial attributes
063     * @see #appendChild(Node)
064     * @see #appendElement(String)
065     */
066    public Element(Tag tag, String baseUri, Attributes attributes) {
067        Validate.notNull(tag);
068        Validate.notNull(baseUri);
069        childNodes = EMPTY_NODES;
070        this.baseUri = baseUri;
071        this.attributes = attributes;
072        this.tag = tag;
073    }
074    
075    /**
076     * Create a new Element from a tag and a base URI.
077     * 
078     * @param tag element tag
079     * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
080     *            string, but not null.
081     * @see Tag#valueOf(String, ParseSettings)
082     */
083    public Element(Tag tag, String baseUri) {
084        this(tag, baseUri, null);
085    }
086
087    protected List<Node> ensureChildNodes() {
088        if (childNodes == EMPTY_NODES) {
089            childNodes = new NodeList(this, 4);
090        }
091        return childNodes;
092    }
093
094    @Override
095    protected boolean hasAttributes() {
096        return attributes != null;
097    }
098
099    @Override
100    public Attributes attributes() {
101        if (!hasAttributes())
102            attributes = new Attributes();
103        return attributes;
104    }
105
106    @Override
107    public String baseUri() {
108        return baseUri;
109    }
110
111    @Override
112    protected void doSetBaseUri(String baseUri) {
113        this.baseUri = baseUri;
114    }
115
116    @Override
117    public int childNodeSize() {
118        return childNodes.size();
119    }
120
121    @Override
122    public String nodeName() {
123        return tag.getName();
124    }
125
126    /**
127     * Get the name of the tag for this element. E.g. {@code div}
128     * 
129     * @return the tag name
130     */
131    public String tagName() {
132        return tag.getName();
133    }
134
135    /**
136     * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
137     * {@code el.tagName("div");}.
138     *
139     * @param tagName new tag name for this element
140     * @return this element, for chaining
141     */
142    public Element tagName(String tagName) {
143        Validate.notEmpty(tagName, "Tag name must not be empty.");
144        tag = Tag.valueOf(tagName, ParseSettings.preserveCase); // preserve the requested tag case
145        return this;
146    }
147
148    /**
149     * Get the Tag for this element.
150     * 
151     * @return the tag object
152     */
153    public Tag tag() {
154        return tag;
155    }
156    
157    /**
158     * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
159     * {@code <p> == false}).
160     * 
161     * @return true if block, false if not (and thus inline)
162     */
163    public boolean isBlock() {
164        return tag.isBlock();
165    }
166
167    /**
168     * Get the {@code id} attribute of this element.
169     * 
170     * @return The id attribute, if present, or an empty string if not.
171     */
172    public String id() {
173        return attributes().getIgnoreCase("id");
174    }
175
176    /**
177     * Set an attribute value on this element. If this element already has an attribute with the
178     * key, its value is updated; otherwise, a new attribute is added.
179     * 
180     * @return this element
181     */
182    public Element attr(String attributeKey, String attributeValue) {
183        super.attr(attributeKey, attributeValue);
184        return this;
185    }
186    
187    /**
188     * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
189     * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
190     * with the same key if it exists.
191     * 
192     * @param attributeKey the attribute key
193     * @param attributeValue the attribute value
194     * 
195     * @return this element
196     */
197    public Element attr(String attributeKey, boolean attributeValue) {
198        attributes().put(attributeKey, attributeValue);
199        return this;
200    }
201
202    /**
203     * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
204     * starting with "data-" is included the dataset.
205     * <p>
206     * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
207     * {@code package=jsoup, language=java}.
208     * <p>
209     * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
210     * in the other map.
211     * <p>
212     * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
213     * @return a map of {@code key=value} custom data attributes.
214     */
215    public Map<String, String> dataset() {
216        return attributes().dataset();
217    }
218
219    @Override
220    public final Element parent() {
221        return (Element) parentNode;
222    }
223
224    /**
225     * Get this element's parent and ancestors, up to the document root.
226     * @return this element's stack of parents, closest first.
227     */
228    public Elements parents() {
229        Elements parents = new Elements();
230        accumulateParents(this, parents);
231        return parents;
232    }
233
234    private static void accumulateParents(Element el, Elements parents) {
235        Element parent = el.parent();
236        if (parent != null && !parent.tagName().equals("#root")) {
237            parents.add(parent);
238            accumulateParents(parent, parents);
239        }
240    }
241
242    /**
243     * Get a child element of this element, by its 0-based index number.
244     * <p>
245     * Note that an element can have both mixed Nodes and Elements as children. This method inspects
246     * a filtered list of children that are elements, and the index is based on that filtered list.
247     * </p>
248     * 
249     * @param index the index number of the element to retrieve
250     * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
251     * @see #childNode(int)
252     */
253    public Element child(int index) {
254        return childElementsList().get(index);
255    }
256
257    /**
258     * Get this element's child elements.
259     * <p>
260     * This is effectively a filter on {@link #childNodes()} to get Element nodes.
261     * </p>
262     * @return child elements. If this element has no children, returns an empty list.
263     * @see #childNodes()
264     */
265    public Elements children() {
266        return new Elements(childElementsList());
267    }
268
269    /**
270     * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
271     * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
272     * @return a list of child elements
273     */
274    private List<Element> childElementsList() {
275        List<Element> children;
276        if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
277            final int size = childNodes.size();
278            children = new ArrayList<>(size);
279            //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
280            for (int i = 0; i < size; i++) {
281                final Node node = childNodes.get(i);
282                if (node instanceof Element)
283                    children.add((Element) node);
284            }
285            shadowChildrenRef = new WeakReference<>(children);
286        }
287        return children;
288    }
289
290    /**
291     * Clears the cached shadow child elements.
292     */
293    @Override
294    void nodelistChanged() {
295        super.nodelistChanged();
296        shadowChildrenRef = null;
297    }
298
299    /**
300     * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
301     * <p>
302     * This is effectively a filter on {@link #childNodes()} to get Text nodes.
303     * @return child text nodes. If this element has no text nodes, returns an
304     * empty list.
305     * </p>
306     * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
307     * <ul>
308     *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
309     *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
310     *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
311     *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
312     *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
313     * </ul>
314     */
315    public List<TextNode> textNodes() {
316        List<TextNode> textNodes = new ArrayList<>();
317        for (Node node : childNodes) {
318            if (node instanceof TextNode)
319                textNodes.add((TextNode) node);
320        }
321        return Collections.unmodifiableList(textNodes);
322    }
323
324    /**
325     * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
326     * <p>
327     * This is effectively a filter on {@link #childNodes()} to get Data nodes.
328     * </p>
329     * @return child data nodes. If this element has no data nodes, returns an
330     * empty list.
331     * @see #data()
332     */
333    public List<DataNode> dataNodes() {
334        List<DataNode> dataNodes = new ArrayList<>();
335        for (Node node : childNodes) {
336            if (node instanceof DataNode)
337                dataNodes.add((DataNode) node);
338        }
339        return Collections.unmodifiableList(dataNodes);
340    }
341
342    /**
343     * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
344     * may include this element, or any of its children.
345     * <p>
346     * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
347     * multiple filters can be combined, e.g.:
348     * </p>
349     * <ul>
350     * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
351     * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
352     * </ul>
353     * <p>
354     * See the query syntax documentation in {@link org.jsoup.select.Selector}.
355     * </p>
356     * 
357     * @param cssQuery a {@link Selector} CSS-like query
358     * @return elements that match the query (empty if none match)
359     * @see org.jsoup.select.Selector
360     * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
361     */
362    public Elements select(String cssQuery) {
363        return Selector.select(cssQuery, this);
364    }
365
366    /**
367     * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
368     * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
369     * execution stops on the first hit.</p>
370     * @param cssQuery cssQuery a {@link Selector} CSS-like query
371     * @return the first matching element, or <b>{@code null}</b> if there is no match.
372     */
373    public Element selectFirst(String cssQuery) {
374        return Selector.selectFirst(cssQuery, this);
375    }
376
377    /**
378     * Check if this element matches the given {@link Selector} CSS query.
379     * @param cssQuery a {@link Selector} CSS query
380     * @return if this element matches the query
381     */
382    public boolean is(String cssQuery) {
383        return is(QueryParser.parse(cssQuery));
384    }
385
386    /**
387     * Check if this element matches the given evaluator.
388     * @param evaluator an element evaluator
389     * @return if this element matches
390     */
391    public boolean is(Evaluator evaluator) {
392        return evaluator.matches((Element)this.root(), this);
393    }
394    
395    /**
396     * Add a node child node to this element.
397     * 
398     * @param child node to add.
399     * @return this element, so that you can add more child nodes or elements.
400     */
401    public Element appendChild(Node child) {
402        Validate.notNull(child);
403
404        // was - Node#addChildren(child). short-circuits an array create and a loop.
405        reparentChild(child);
406        ensureChildNodes();
407        childNodes.add(child);
408        child.setSiblingIndex(childNodes.size() - 1);
409        return this;
410    }
411
412    /**
413     * Add this element to the supplied parent element, as its next child.
414     *
415     * @param parent element to which this element will be appended
416     * @return this element, so that you can continue modifying the element
417     */
418    public Element appendTo(Element parent) {
419        Validate.notNull(parent);
420        parent.appendChild(this);
421        return this;
422    }
423
424    /**
425     * Add a node to the start of this element's children.
426     * 
427     * @param child node to add.
428     * @return this element, so that you can add more child nodes or elements.
429     */
430    public Element prependChild(Node child) {
431        Validate.notNull(child);
432        
433        addChildren(0, child);
434        return this;
435    }
436
437
438    /**
439     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
440     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
441     *
442     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
443     * end
444     * @param children child nodes to insert
445     * @return this element, for chaining.
446     */
447    public Element insertChildren(int index, Collection<? extends Node> children) {
448        Validate.notNull(children, "Children collection to be inserted must not be null.");
449        int currentSize = childNodeSize();
450        if (index < 0) index += currentSize +1; // roll around
451        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
452
453        ArrayList<Node> nodes = new ArrayList<>(children);
454        Node[] nodeArray = nodes.toArray(new Node[nodes.size()]);
455        addChildren(index, nodeArray);
456        return this;
457    }
458
459    /**
460     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
461     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
462     *
463     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
464     * end
465     * @param children child nodes to insert
466     * @return this element, for chaining.
467     */
468    public Element insertChildren(int index, Node... children) {
469        Validate.notNull(children, "Children collection to be inserted must not be null.");
470        int currentSize = childNodeSize();
471        if (index < 0) index += currentSize +1; // roll around
472        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
473
474        addChildren(index, children);
475        return this;
476    }
477    
478    /**
479     * Create a new element by tag name, and add it as the last child.
480     * 
481     * @param tagName the name of the tag (e.g. {@code div}).
482     * @return the new element, to allow you to add content to it, e.g.:
483     *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
484     */
485    public Element appendElement(String tagName) {
486        Element child = new Element(Tag.valueOf(tagName), baseUri());
487        appendChild(child);
488        return child;
489    }
490    
491    /**
492     * Create a new element by tag name, and add it as the first child.
493     * 
494     * @param tagName the name of the tag (e.g. {@code div}).
495     * @return the new element, to allow you to add content to it, e.g.:
496     *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
497     */
498    public Element prependElement(String tagName) {
499        Element child = new Element(Tag.valueOf(tagName), baseUri());
500        prependChild(child);
501        return child;
502    }
503    
504    /**
505     * Create and append a new TextNode to this element.
506     * 
507     * @param text the unencoded text to add
508     * @return this element
509     */
510    public Element appendText(String text) {
511        Validate.notNull(text);
512        TextNode node = new TextNode(text);
513        appendChild(node);
514        return this;
515    }
516    
517    /**
518     * Create and prepend a new TextNode to this element.
519     * 
520     * @param text the unencoded text to add
521     * @return this element
522     */
523    public Element prependText(String text) {
524        Validate.notNull(text);
525        TextNode node = new TextNode(text);
526        prependChild(node);
527        return this;
528    }
529    
530    /**
531     * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
532     * @param html HTML to add inside this element, after the existing HTML
533     * @return this element
534     * @see #html(String)
535     */
536    public Element append(String html) {
537        Validate.notNull(html);
538
539        List<Node> nodes = Parser.parseFragment(html, this, baseUri());
540        addChildren(nodes.toArray(new Node[nodes.size()]));
541        return this;
542    }
543    
544    /**
545     * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
546     * @param html HTML to add inside this element, before the existing HTML
547     * @return this element
548     * @see #html(String)
549     */
550    public Element prepend(String html) {
551        Validate.notNull(html);
552        
553        List<Node> nodes = Parser.parseFragment(html, this, baseUri());
554        addChildren(0, nodes.toArray(new Node[nodes.size()]));
555        return this;
556    }
557
558    /**
559     * Insert the specified HTML into the DOM before this element (as a preceding sibling).
560     *
561     * @param html HTML to add before this element
562     * @return this element, for chaining
563     * @see #after(String)
564     */
565    @Override
566    public Element before(String html) {
567        return (Element) super.before(html);
568    }
569
570    /**
571     * Insert the specified node into the DOM before this node (as a preceding sibling).
572     * @param node to add before this element
573     * @return this Element, for chaining
574     * @see #after(Node)
575     */
576    @Override
577    public Element before(Node node) {
578        return (Element) super.before(node);
579    }
580
581    /**
582     * Insert the specified HTML into the DOM after this element (as a following sibling).
583     *
584     * @param html HTML to add after this element
585     * @return this element, for chaining
586     * @see #before(String)
587     */
588    @Override
589    public Element after(String html) {
590        return (Element) super.after(html);
591    }
592
593    /**
594     * Insert the specified node into the DOM after this node (as a following sibling).
595     * @param node to add after this element
596     * @return this element, for chaining
597     * @see #before(Node)
598     */
599    @Override
600    public Element after(Node node) {
601        return (Element) super.after(node);
602    }
603
604    /**
605     * Remove all of the element's child nodes. Any attributes are left as-is.
606     * @return this element
607     */
608    public Element empty() {
609        childNodes.clear();
610        return this;
611    }
612
613    /**
614     * Wrap the supplied HTML around this element.
615     *
616     * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
617     * @return this element, for chaining.
618     */
619    @Override
620    public Element wrap(String html) {
621        return (Element) super.wrap(html);
622    }
623
624    /**
625     * Get a CSS selector that will uniquely select this element.
626     * <p>
627     * If the element has an ID, returns #id;
628     * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
629     * followed by a unique selector for the element (tag.class.class:nth-child(n)).
630     * </p>
631     *
632     * @return the CSS Path that can be used to retrieve the element in a selector.
633     */
634    public String cssSelector() {
635        if (id().length() > 0)
636            return "#" + id();
637
638        // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag
639        String tagName = tagName().replace(':', '|');
640        StringBuilder selector = new StringBuilder(tagName);
641        String classes = StringUtil.join(classNames(), ".");
642        if (classes.length() > 0)
643            selector.append('.').append(classes);
644
645        if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
646            return selector.toString();
647
648        selector.insert(0, " > ");
649        if (parent().select(selector.toString()).size() > 1)
650            selector.append(String.format(
651                ":nth-child(%d)", elementSiblingIndex() + 1));
652
653        return parent().cssSelector() + selector.toString();
654    }
655
656    /**
657     * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
658     * of itself, so will not be included in the returned list.
659     * @return sibling elements
660     */
661    public Elements siblingElements() {
662        if (parentNode == null)
663            return new Elements(0);
664
665        List<Element> elements = parent().childElementsList();
666        Elements siblings = new Elements(elements.size() - 1);
667        for (Element el: elements)
668            if (el != this)
669                siblings.add(el);
670        return siblings;
671    }
672
673    /**
674     * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 
675     * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
676     * <p>
677     * This is similar to {@link #nextSibling()}, but specifically finds only Elements
678     * </p>
679     * @return the next element, or null if there is no next element
680     * @see #previousElementSibling()
681     */
682    public Element nextElementSibling() {
683        if (parentNode == null) return null;
684        List<Element> siblings = parent().childElementsList();
685        Integer index = indexInList(this, siblings);
686        Validate.notNull(index);
687        if (siblings.size() > index+1)
688            return siblings.get(index+1);
689        else
690            return null;
691    }
692
693    /**
694     * Gets the previous element sibling of this element.
695     * @return the previous element, or null if there is no previous element
696     * @see #nextElementSibling()
697     */
698    public Element previousElementSibling() {
699        if (parentNode == null) return null;
700        List<Element> siblings = parent().childElementsList();
701        Integer index = indexInList(this, siblings);
702        Validate.notNull(index);
703        if (index > 0)
704            return siblings.get(index-1);
705        else
706            return null;
707    }
708
709    /**
710     * Gets the first element sibling of this element.
711     * @return the first sibling that is an element (aka the parent's first element child) 
712     */
713    public Element firstElementSibling() {
714        // todo: should firstSibling() exclude this?
715        List<Element> siblings = parent().childElementsList();
716        return siblings.size() > 1 ? siblings.get(0) : null;
717    }
718    
719    /**
720     * Get the list index of this element in its element sibling list. I.e. if this is the first element
721     * sibling, returns 0.
722     * @return position in element sibling list
723     */
724    public int elementSiblingIndex() {
725       if (parent() == null) return 0;
726       return indexInList(this, parent().childElementsList());
727    }
728
729    /**
730     * Gets the last element sibling of this element
731     * @return the last sibling that is an element (aka the parent's last element child) 
732     */
733    public Element lastElementSibling() {
734        List<Element> siblings = parent().childElementsList();
735        return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null;
736    }
737
738    private static <E extends Element> int indexInList(Element search, List<E> elements) {
739        for (int i = 0; i < elements.size(); i++) {
740            if (elements.get(i) == search)
741                return i;
742        }
743        return 0;
744    }
745
746    // DOM type methods
747
748    /**
749     * Finds elements, including and recursively under this element, with the specified tag name.
750     * @param tagName The tag name to search for (case insensitively).
751     * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
752     */
753    public Elements getElementsByTag(String tagName) {
754        Validate.notEmpty(tagName);
755        tagName = normalize(tagName);
756
757        return Collector.collect(new Evaluator.Tag(tagName), this);
758    }
759
760    /**
761     * Find an element by ID, including or under this element.
762     * <p>
763     * Note that this finds the first matching ID, starting with this element. If you search down from a different
764     * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
765     * use {@link Document#getElementById(String)}
766     * @param id The ID to search for.
767     * @return The first matching element by ID, starting with this element, or null if none found.
768     */
769    public Element getElementById(String id) {
770        Validate.notEmpty(id);
771        
772        Elements elements = Collector.collect(new Evaluator.Id(id), this);
773        if (elements.size() > 0)
774            return elements.get(0);
775        else
776            return null;
777    }
778
779    /**
780     * Find elements that have this class, including or under this element. Case insensitive.
781     * <p>
782     * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method
783     * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
784     * 
785     * @param className the name of the class to search for.
786     * @return elements with the supplied class name, empty if none
787     * @see #hasClass(String)
788     * @see #classNames()
789     */
790    public Elements getElementsByClass(String className) {
791        Validate.notEmpty(className);
792
793        return Collector.collect(new Evaluator.Class(className), this);
794    }
795
796    /**
797     * Find elements that have a named attribute set. Case insensitive.
798     *
799     * @param key name of the attribute, e.g. {@code href}
800     * @return elements that have this attribute, empty if none
801     */
802    public Elements getElementsByAttribute(String key) {
803        Validate.notEmpty(key);
804        key = key.trim();
805
806        return Collector.collect(new Evaluator.Attribute(key), this);
807    }
808
809    /**
810     * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
811     * that have HTML5 datasets.
812     * @param keyPrefix name prefix of the attribute e.g. {@code data-}
813     * @return elements that have attribute names that start with with the prefix, empty if none.
814     */
815    public Elements getElementsByAttributeStarting(String keyPrefix) {
816        Validate.notEmpty(keyPrefix);
817        keyPrefix = keyPrefix.trim();
818
819        return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
820    }
821
822    /**
823     * Find elements that have an attribute with the specific value. Case insensitive.
824     * 
825     * @param key name of the attribute
826     * @param value value of the attribute
827     * @return elements that have this attribute with this value, empty if none
828     */
829    public Elements getElementsByAttributeValue(String key, String value) {
830        return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
831    }
832
833    /**
834     * Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
835     * 
836     * @param key name of the attribute
837     * @param value value of the attribute
838     * @return elements that do not have a matching attribute
839     */
840    public Elements getElementsByAttributeValueNot(String key, String value) {
841        return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
842    }
843
844    /**
845     * Find elements that have attributes that start with the value prefix. Case insensitive.
846     * 
847     * @param key name of the attribute
848     * @param valuePrefix start of attribute value
849     * @return elements that have attributes that start with the value prefix
850     */
851    public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
852        return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
853    }
854
855    /**
856     * Find elements that have attributes that end with the value suffix. Case insensitive.
857     * 
858     * @param key name of the attribute
859     * @param valueSuffix end of the attribute value
860     * @return elements that have attributes that end with the value suffix
861     */
862    public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
863        return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
864    }
865
866    /**
867     * Find elements that have attributes whose value contains the match string. Case insensitive.
868     * 
869     * @param key name of the attribute
870     * @param match substring of value to search for
871     * @return elements that have attributes containing this text
872     */
873    public Elements getElementsByAttributeValueContaining(String key, String match) {
874        return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
875    }
876    
877    /**
878     * Find elements that have attributes whose values match the supplied regular expression.
879     * @param key name of the attribute
880     * @param pattern compiled regular expression to match against attribute values
881     * @return elements that have attributes matching this regular expression
882     */
883    public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
884        return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
885        
886    }
887    
888    /**
889     * Find elements that have attributes whose values match the supplied regular expression.
890     * @param key name of the attribute
891     * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
892     * @return elements that have attributes matching this regular expression
893     */
894    public Elements getElementsByAttributeValueMatching(String key, String regex) {
895        Pattern pattern;
896        try {
897            pattern = Pattern.compile(regex);
898        } catch (PatternSyntaxException e) {
899            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
900        }
901        return getElementsByAttributeValueMatching(key, pattern);
902    }
903    
904    /**
905     * Find elements whose sibling index is less than the supplied index.
906     * @param index 0-based index
907     * @return elements less than index
908     */
909    public Elements getElementsByIndexLessThan(int index) {
910        return Collector.collect(new Evaluator.IndexLessThan(index), this);
911    }
912    
913    /**
914     * Find elements whose sibling index is greater than the supplied index.
915     * @param index 0-based index
916     * @return elements greater than index
917     */
918    public Elements getElementsByIndexGreaterThan(int index) {
919        return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
920    }
921    
922    /**
923     * Find elements whose sibling index is equal to the supplied index.
924     * @param index 0-based index
925     * @return elements equal to index
926     */
927    public Elements getElementsByIndexEquals(int index) {
928        return Collector.collect(new Evaluator.IndexEquals(index), this);
929    }
930    
931    /**
932     * Find elements that contain the specified string. The search is case insensitive. The text may appear directly
933     * in the element, or in any of its descendants.
934     * @param searchText to look for in the element's text
935     * @return elements that contain the string, case insensitive.
936     * @see Element#text()
937     */
938    public Elements getElementsContainingText(String searchText) {
939        return Collector.collect(new Evaluator.ContainsText(searchText), this);
940    }
941    
942    /**
943     * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
944     * in the element, not in any of its descendants.
945     * @param searchText to look for in the element's own text
946     * @return elements that contain the string, case insensitive.
947     * @see Element#ownText()
948     */
949    public Elements getElementsContainingOwnText(String searchText) {
950        return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
951    }
952    
953    /**
954     * Find elements whose text matches the supplied regular expression.
955     * @param pattern regular expression to match text against
956     * @return elements matching the supplied regular expression.
957     * @see Element#text()
958     */
959    public Elements getElementsMatchingText(Pattern pattern) {
960        return Collector.collect(new Evaluator.Matches(pattern), this);
961    }
962    
963    /**
964     * Find elements whose text matches the supplied regular expression.
965     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
966     * @return elements matching the supplied regular expression.
967     * @see Element#text()
968     */
969    public Elements getElementsMatchingText(String regex) {
970        Pattern pattern;
971        try {
972            pattern = Pattern.compile(regex);
973        } catch (PatternSyntaxException e) {
974            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
975        }
976        return getElementsMatchingText(pattern);
977    }
978    
979    /**
980     * Find elements whose own text matches the supplied regular expression.
981     * @param pattern regular expression to match text against
982     * @return elements matching the supplied regular expression.
983     * @see Element#ownText()
984     */
985    public Elements getElementsMatchingOwnText(Pattern pattern) {
986        return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
987    }
988    
989    /**
990     * Find elements whose text matches the supplied regular expression.
991     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
992     * @return elements matching the supplied regular expression.
993     * @see Element#ownText()
994     */
995    public Elements getElementsMatchingOwnText(String regex) {
996        Pattern pattern;
997        try {
998            pattern = Pattern.compile(regex);
999        } catch (PatternSyntaxException e) {
1000            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1001        }
1002        return getElementsMatchingOwnText(pattern);
1003    }
1004    
1005    /**
1006     * Find all elements under this element (including self, and children of children).
1007     * 
1008     * @return all elements
1009     */
1010    public Elements getAllElements() {
1011        return Collector.collect(new Evaluator.AllElements(), this);
1012    }
1013
1014    /**
1015     * Gets the combined text of this element and all its children. Whitespace is normalized and trimmed.
1016     * <p>
1017     * For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there now!"}
1018     *
1019     * @return unencoded text, or empty string if none.
1020     * @see #ownText()
1021     * @see #textNodes()
1022     */
1023    public String text() {
1024        final StringBuilder accum = new StringBuilder();
1025        NodeTraversor.traverse(new NodeVisitor() {
1026            public void head(Node node, int depth) {
1027                if (node instanceof TextNode) {
1028                    TextNode textNode = (TextNode) node;
1029                    appendNormalisedText(accum, textNode);
1030                } else if (node instanceof Element) {
1031                    Element element = (Element) node;
1032                    if (accum.length() > 0 &&
1033                        (element.isBlock() || element.tag.getName().equals("br")) &&
1034                        !TextNode.lastCharIsWhitespace(accum))
1035                        accum.append(' ');
1036                }
1037            }
1038
1039            public void tail(Node node, int depth) {
1040            }
1041        }, this);
1042        return accum.toString().trim();
1043    }
1044
1045    /**
1046     * Gets the text owned by this element only; does not get the combined text of all children.
1047     * <p>
1048     * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1049     * whereas {@code p.text()} returns {@code "Hello there now!"}.
1050     * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1051     *
1052     * @return unencoded text, or empty string if none.
1053     * @see #text()
1054     * @see #textNodes()
1055     */
1056    public String ownText() {
1057        StringBuilder sb = new StringBuilder();
1058        ownText(sb);
1059        return sb.toString().trim();
1060    }
1061
1062    private void ownText(StringBuilder accum) {
1063        for (Node child : childNodes) {
1064            if (child instanceof TextNode) {
1065                TextNode textNode = (TextNode) child;
1066                appendNormalisedText(accum, textNode);
1067            } else if (child instanceof Element) {
1068                appendWhitespaceIfBr((Element) child, accum);
1069            }
1070        }
1071    }
1072
1073    private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1074        String text = textNode.getWholeText();
1075
1076        if (preserveWhitespace(textNode.parentNode))
1077            accum.append(text);
1078        else
1079            StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
1080    }
1081
1082    private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
1083        if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
1084            accum.append(" ");
1085    }
1086
1087    static boolean preserveWhitespace(Node node) {
1088        // looks only at this element and one level up, to prevent recursion & needless stack searches
1089        if (node != null && node instanceof Element) {
1090            Element element = (Element) node;
1091            return element.tag.preserveWhitespace() ||
1092                element.parent() != null && element.parent().tag.preserveWhitespace();
1093        }
1094        return false;
1095    }
1096
1097    /**
1098     * Set the text of this element. Any existing contents (text or elements) will be cleared
1099     * @param text unencoded text
1100     * @return this element
1101     */
1102    public Element text(String text) {
1103        Validate.notNull(text);
1104
1105        empty();
1106        TextNode textNode = new TextNode(text);
1107        appendChild(textNode);
1108
1109        return this;
1110    }
1111
1112    /**
1113     Test if this element has any text content (that is not just whitespace).
1114     @return true if element has non-blank text content.
1115     */
1116    public boolean hasText() {
1117        for (Node child: childNodes) {
1118            if (child instanceof TextNode) {
1119                TextNode textNode = (TextNode) child;
1120                if (!textNode.isBlank())
1121                    return true;
1122            } else if (child instanceof Element) {
1123                Element el = (Element) child;
1124                if (el.hasText())
1125                    return true;
1126            }
1127        }
1128        return false;
1129    }
1130
1131    /**
1132     * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag. Note that data is NOT the
1133     * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@link #data()}
1134     * for the contents of scripts, comments, CSS styles, etc.
1135     *
1136     * @return the data, or empty string if none
1137     *
1138     * @see #dataNodes()
1139     */
1140    public String data() {
1141        StringBuilder sb = new StringBuilder();
1142
1143        for (Node childNode : childNodes) {
1144            if (childNode instanceof DataNode) {
1145                DataNode data = (DataNode) childNode;
1146                sb.append(data.getWholeData());
1147            } else if (childNode instanceof Comment) {
1148                Comment comment = (Comment) childNode;
1149                sb.append(comment.getData());
1150            } else if (childNode instanceof Element) {
1151                Element element = (Element) childNode;
1152                String elementData = element.data();
1153                sb.append(elementData);
1154            }
1155        }
1156        return sb.toString();
1157    }   
1158
1159    /**
1160     * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1161     * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1162     * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1163     */
1164    public String className() {
1165        return attr("class").trim();
1166    }
1167
1168    /**
1169     * Get all of the element's class names. E.g. on element {@code <div class="header gray">},
1170     * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1171     * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1172     * @return set of classnames, empty if no class attribute
1173     */
1174    public Set<String> classNames() {
1175        String[] names = classSplit.split(className());
1176        Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1177        classNames.remove(""); // if classNames() was empty, would include an empty class
1178
1179        return classNames;
1180    }
1181
1182    /**
1183     Set the element's {@code class} attribute to the supplied class names.
1184     @param classNames set of classes
1185     @return this element, for chaining
1186     */
1187    public Element classNames(Set<String> classNames) {
1188        Validate.notNull(classNames);
1189        attributes().put("class", StringUtil.join(classNames, " "));
1190        return this;
1191    }
1192
1193    /**
1194     * Tests if this element has a class. Case insensitive.
1195     * @param className name of class to check for
1196     * @return true if it does, false if not
1197     */
1198    // performance sensitive
1199    public boolean hasClass(String className) {
1200        final String classAttr = attributes().getIgnoreCase("class");
1201        final int len = classAttr.length();
1202        final int wantLen = className.length();
1203
1204        if (len == 0 || len < wantLen) {
1205            return false;
1206        }
1207
1208        // if both lengths are equal, only need compare the className with the attribute
1209        if (len == wantLen) {
1210            return className.equalsIgnoreCase(classAttr);
1211        }
1212
1213        // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1214        boolean inClass = false;
1215        int start = 0;
1216        for (int i = 0; i < len; i++) {
1217            if (Character.isWhitespace(classAttr.charAt(i))) {
1218                if (inClass) {
1219                    // white space ends a class name, compare it with the requested one, ignore case
1220                    if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1221                        return true;
1222                    }
1223                    inClass = false;
1224                }
1225            } else {
1226                if (!inClass) {
1227                    // we're in a class name : keep the start of the substring
1228                    inClass = true;
1229                    start = i;
1230                }
1231            }
1232        }
1233
1234        // check the last entry
1235        if (inClass && len - start == wantLen) {
1236            return classAttr.regionMatches(true, start, className, 0, wantLen);
1237        }
1238
1239        return false;
1240    }
1241
1242    /**
1243     Add a class name to this element's {@code class} attribute.
1244     @param className class name to add
1245     @return this element
1246     */
1247    public Element addClass(String className) {
1248        Validate.notNull(className);
1249
1250        Set<String> classes = classNames();
1251        classes.add(className);
1252        classNames(classes);
1253
1254        return this;
1255    }
1256
1257    /**
1258     Remove a class name from this element's {@code class} attribute.
1259     @param className class name to remove
1260     @return this element
1261     */
1262    public Element removeClass(String className) {
1263        Validate.notNull(className);
1264
1265        Set<String> classes = classNames();
1266        classes.remove(className);
1267        classNames(classes);
1268
1269        return this;
1270    }
1271
1272    /**
1273     Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1274     @param className class name to toggle
1275     @return this element
1276     */
1277    public Element toggleClass(String className) {
1278        Validate.notNull(className);
1279
1280        Set<String> classes = classNames();
1281        if (classes.contains(className))
1282            classes.remove(className);
1283        else
1284            classes.add(className);
1285        classNames(classes);
1286
1287        return this;
1288    }
1289    
1290    /**
1291     * Get the value of a form element (input, textarea, etc).
1292     * @return the value of the form element, or empty string if not set.
1293     */
1294    public String val() {
1295        if (tagName().equals("textarea"))
1296            return text();
1297        else
1298            return attr("value");
1299    }
1300    
1301    /**
1302     * Set the value of a form element (input, textarea, etc).
1303     * @param value value to set
1304     * @return this element (for chaining)
1305     */
1306    public Element val(String value) {
1307        if (tagName().equals("textarea"))
1308            text(value);
1309        else
1310            attr("value", value);
1311        return this;
1312    }
1313
1314    void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
1315        if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline())) {
1316            if (accum instanceof StringBuilder) {
1317                if (((StringBuilder) accum).length() > 0)
1318                    indent(accum, depth, out);
1319            } else {
1320                indent(accum, depth, out);
1321            }
1322        }
1323        accum.append('<').append(tagName());
1324        if (attributes != null) attributes.html(accum, out);
1325
1326        // selfclosing includes unknown tags, isEmpty defines tags that are always empty
1327        if (childNodes.isEmpty() && tag.isSelfClosing()) {
1328            if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty())
1329                accum.append('>');
1330            else
1331                accum.append(" />"); // <img> in html, <img /> in xml
1332        }
1333        else
1334            accum.append('>');
1335    }
1336
1337        void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
1338        if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
1339            if (out.prettyPrint() && (!childNodes.isEmpty() && (
1340                    tag.formatAsBlock() || (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && !(childNodes.get(0) instanceof TextNode))))
1341            )))
1342                indent(accum, depth, out);
1343            accum.append("</").append(tagName()).append('>');
1344        }
1345    }
1346
1347    /**
1348     * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1349     * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1350     * 
1351     * @return String of HTML.
1352     * @see #outerHtml()
1353     */
1354    public String html() {
1355        StringBuilder accum = StringUtil.stringBuilder();
1356        html(accum);
1357        return getOutputSettings().prettyPrint() ? accum.toString().trim() : accum.toString();
1358    }
1359
1360    private void html(StringBuilder accum) {
1361        for (Node node : childNodes)
1362            node.outerHtml(accum);
1363    }
1364
1365    /**
1366     * {@inheritDoc}
1367     */
1368    @Override
1369    public <T extends Appendable> T html(T appendable) {
1370        for (Node node : childNodes)
1371            node.outerHtml(appendable);
1372
1373        return appendable;
1374    }
1375    
1376    /**
1377     * Set this element's inner HTML. Clears the existing HTML first.
1378     * @param html HTML to parse and set into this element
1379     * @return this element
1380     * @see #append(String)
1381     */
1382    public Element html(String html) {
1383        empty();
1384        append(html);
1385        return this;
1386    }
1387
1388        public String toString() {
1389        return outerHtml();
1390    }
1391
1392    @Override
1393    public Element clone() {
1394        return (Element) super.clone();
1395    }
1396
1397    @Override
1398    protected Element doClone(Node parent) {
1399        Element clone = (Element) super.doClone(parent);
1400        clone.attributes = attributes != null ? attributes.clone() : null;
1401        clone.baseUri = baseUri;
1402        clone.childNodes = new NodeList(clone, childNodes.size());
1403        clone.childNodes.addAll(childNodes);
1404
1405        return clone;
1406    }
1407
1408    private static final class NodeList extends ChangeNotifyingArrayList<Node> {
1409        private final Element owner;
1410
1411        NodeList(Element owner, int initialCapacity) {
1412            super(initialCapacity);
1413            this.owner = owner;
1414        }
1415
1416        public void onContentsChanged() {
1417            owner.nodelistChanged();
1418        }
1419    }
1420}