001package org.jsoup.nodes;
002
003import org.jsoup.SerializationException;
004import org.jsoup.helper.StringUtil;
005import org.jsoup.helper.Validate;
006import org.jsoup.parser.Parser;
007import org.jsoup.select.NodeFilter;
008import org.jsoup.select.NodeTraversor;
009import org.jsoup.select.NodeVisitor;
010
011import java.io.IOException;
012import java.util.*;
013
014/**
015 The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
016
017 @author Jonathan Hedley, jonathan@hedley.net */
018public abstract class Node implements Cloneable {
019    static final String EmptyString = "";
020    Node parentNode;
021    int siblingIndex;
022
023    /**
024     * Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
025     */
026    protected Node() {
027    }
028
029    /**
030     Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
031     @return node name
032     */
033    public abstract String nodeName();
034
035    /**
036     * Check if this Node has an actual Attributes object.
037     */
038    protected abstract boolean hasAttributes();
039
040    public boolean hasParent() {
041        return parentNode != null;
042    }
043
044    /**
045     * Get an attribute's value by its key. <b>Case insensitive</b>
046     * <p>
047     * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
048     * which is a shortcut to the {@link #absUrl} method.
049     * </p>
050     * E.g.:
051     * <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
052     *
053     * @param attributeKey The attribute key.
054     * @return The attribute, or empty string if not present (to avoid nulls).
055     * @see #attributes()
056     * @see #hasAttr(String)
057     * @see #absUrl(String)
058     */
059    public String attr(String attributeKey) {
060        Validate.notNull(attributeKey);
061        if (!hasAttributes())
062            return EmptyString;
063
064        String val = attributes().getIgnoreCase(attributeKey);
065        if (val.length() > 0)
066            return val;
067        else if (attributeKey.startsWith("abs:"))
068            return absUrl(attributeKey.substring("abs:".length()));
069        else return "";
070    }
071
072    /**
073     * Get all of the element's attributes.
074     * @return attributes (which implements iterable, in same order as presented in original HTML).
075     */
076    public abstract Attributes attributes();
077
078    /**
079     * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is
080     * <b>case insensitive</b>.
081     * @param attributeKey The attribute key.
082     * @param attributeValue The attribute value.
083     * @return this (for chaining)
084     */
085    public Node attr(String attributeKey, String attributeValue) {
086        attributes().putIgnoreCase(attributeKey, attributeValue);
087        return this;
088    }
089
090    /**
091     * Test if this element has an attribute. <b>Case insensitive</b>
092     * @param attributeKey The attribute key to check.
093     * @return true if the attribute exists, false if not.
094     */
095    public boolean hasAttr(String attributeKey) {
096        Validate.notNull(attributeKey);
097
098        if (attributeKey.startsWith("abs:")) {
099            String key = attributeKey.substring("abs:".length());
100            if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).equals(""))
101                return true;
102        }
103        return attributes().hasKeyIgnoreCase(attributeKey);
104    }
105
106    /**
107     * Remove an attribute from this element.
108     * @param attributeKey The attribute to remove.
109     * @return this (for chaining)
110     */
111    public Node removeAttr(String attributeKey) {
112        Validate.notNull(attributeKey);
113        attributes().removeIgnoreCase(attributeKey);
114        return this;
115    }
116
117    /**
118     * Clear (remove) all of the attributes in this node.
119     * @return this, for chaining
120     */
121    public Node clearAttributes() {
122        Iterator<Attribute> it = attributes().iterator();
123        while (it.hasNext()) {
124            it.next();
125            it.remove();
126        }
127        return this;
128    }
129
130    /**
131     Get the base URI of this node.
132     @return base URI
133     */
134    public abstract String baseUri();
135
136    /**
137     * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs.
138     * @param baseUri new URI
139     */
140    protected abstract void doSetBaseUri(String baseUri);
141
142    /**
143     Update the base URI of this node and all of its descendants.
144     @param baseUri base URI to set
145     */
146    public void setBaseUri(final String baseUri) {
147        Validate.notNull(baseUri);
148
149        traverse(new NodeVisitor() {
150            public void head(Node node, int depth) {
151                node.doSetBaseUri(baseUri);
152            }
153
154            public void tail(Node node, int depth) {
155            }
156        });
157    }
158
159    /**
160     * Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href&gt;</code> or
161     * <code>&lt;img src&gt;</code>).
162     * <p>
163     * E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
164     * </p>
165     * <p>
166     * If the attribute value is already absolute (i.e. it starts with a protocol, like
167     * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
168     * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
169     * absolute using that.
170     * </p>
171     * <p>
172     * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
173     * <code>String absUrl = linkEl.attr("abs:href");</code>
174     * </p>
175     *
176     * @param attributeKey The attribute key
177     * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
178     * could not be made successfully into a URL.
179     * @see #attr
180     * @see java.net.URL#URL(java.net.URL, String)
181     */
182    public String absUrl(String attributeKey) {
183        Validate.notEmpty(attributeKey);
184
185        if (!hasAttr(attributeKey)) {
186            return ""; // nothing to make absolute with
187        } else {
188            return StringUtil.resolve(baseUri(), attr(attributeKey));
189        }
190    }
191
192    protected abstract List<Node> ensureChildNodes();
193
194    /**
195     Get a child node by its 0-based index.
196     @param index index of child node
197     @return the child node at this index. Throws a {@code IndexOutOfBoundsException} if the index is out of bounds.
198     */
199    public Node childNode(int index) {
200        return ensureChildNodes().get(index);
201    }
202
203    /**
204     Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
205     themselves can be manipulated.
206     @return list of children. If no children, returns an empty list.
207     */
208    public List<Node> childNodes() {
209        return Collections.unmodifiableList(ensureChildNodes());
210    }
211
212    /**
213     * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original
214     * nodes
215     * @return a deep copy of this node's children
216     */
217    public List<Node> childNodesCopy() {
218        final List<Node> nodes = ensureChildNodes();
219        final ArrayList<Node> children = new ArrayList<>(nodes.size());
220        for (Node node : nodes) {
221            children.add(node.clone());
222        }
223        return children;
224    }
225
226    /**
227     * Get the number of child nodes that this node holds.
228     * @return the number of child nodes that this node holds.
229     */
230    public abstract int childNodeSize();
231
232    protected Node[] childNodesAsArray() {
233        return ensureChildNodes().toArray(new Node[childNodeSize()]);
234    }
235
236    /**
237     Gets this node's parent node.
238     @return parent node; or null if no parent.
239     */
240    public Node parent() {
241        return parentNode;
242    }
243
244    /**
245     Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type.
246     @return parent node; or null if no parent.
247     */
248    public final Node parentNode() {
249        return parentNode;
250    }
251
252    /**
253     * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}.
254     * @return topmost ancestor.
255     */
256    public Node root() {
257        Node node = this;
258        while (node.parentNode != null)
259            node = node.parentNode;
260        return node;
261    }
262
263    /**
264     * Gets the Document associated with this Node.
265     * @return the Document associated with this Node, or null if there is no such Document.
266     */
267    public Document ownerDocument() {
268        Node root = root();
269        return (root instanceof Document) ? (Document) root : null;
270    }
271
272    /**
273     * Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
274     */
275    public void remove() {
276        Validate.notNull(parentNode);
277        parentNode.removeChild(this);
278    }
279
280    /**
281     * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
282     * @param html HTML to add before this node
283     * @return this node, for chaining
284     * @see #after(String)
285     */
286    public Node before(String html) {
287        addSiblingHtml(siblingIndex, html);
288        return this;
289    }
290
291    /**
292     * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
293     * @param node to add before this node
294     * @return this node, for chaining
295     * @see #after(Node)
296     */
297    public Node before(Node node) {
298        Validate.notNull(node);
299        Validate.notNull(parentNode);
300
301        parentNode.addChildren(siblingIndex, node);
302        return this;
303    }
304
305    /**
306     * Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
307     * @param html HTML to add after this node
308     * @return this node, for chaining
309     * @see #before(String)
310     */
311    public Node after(String html) {
312        addSiblingHtml(siblingIndex + 1, html);
313        return this;
314    }
315
316    /**
317     * Insert the specified node into the DOM after this node (i.e. as a following sibling).
318     * @param node to add after this node
319     * @return this node, for chaining
320     * @see #before(Node)
321     */
322    public Node after(Node node) {
323        Validate.notNull(node);
324        Validate.notNull(parentNode);
325
326        parentNode.addChildren(siblingIndex + 1, node);
327        return this;
328    }
329
330    private void addSiblingHtml(int index, String html) {
331        Validate.notNull(html);
332        Validate.notNull(parentNode);
333
334        Element context = parent() instanceof Element ? (Element) parent() : null;
335        List<Node> nodes = Parser.parseFragment(html, context, baseUri());
336        parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
337    }
338
339    /**
340     Wrap the supplied HTML around this node.
341     @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
342     @return this node, for chaining.
343     */
344    public Node wrap(String html) {
345        Validate.notEmpty(html);
346
347        Element context = parent() instanceof Element ? (Element) parent() : null;
348        List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
349        Node wrapNode = wrapChildren.get(0);
350        if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
351            return null;
352
353        Element wrap = (Element) wrapNode;
354        Element deepest = getDeepChild(wrap);
355        parentNode.replaceChild(this, wrap);
356        deepest.addChildren(this);
357
358        // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
359        if (wrapChildren.size() > 0) {
360            //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
361            for (int i = 0; i < wrapChildren.size(); i++) {
362                Node remainder = wrapChildren.get(i);
363                remainder.parentNode.removeChild(remainder);
364                wrap.appendChild(remainder);
365            }
366        }
367        return this;
368    }
369
370    /**
371     * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
372     * the node but keeping its children.
373     * <p>
374     * For example, with the input html:
375     * </p>
376     * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p>
377     * Calling {@code element.unwrap()} on the {@code span} element will result in the html:
378     * <p>{@code <div>One Two <b>Three</b></div>}</p>
379     * and the {@code "Two "} {@link TextNode} being returned.
380     *
381     * @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
382     * @see #remove()
383     * @see #wrap(String)
384     */
385    public Node unwrap() {
386        Validate.notNull(parentNode);
387        final List<Node> childNodes = ensureChildNodes();
388        Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
389        parentNode.addChildren(siblingIndex, this.childNodesAsArray());
390        this.remove();
391
392        return firstChild;
393    }
394
395    private Element getDeepChild(Element el) {
396        List<Element> children = el.children();
397        if (children.size() > 0)
398            return getDeepChild(children.get(0));
399        else
400            return el;
401    }
402
403    void nodelistChanged() {
404        // Element overrides this to clear its shadow children elements
405    }
406
407    /**
408     * Replace this node in the DOM with the supplied node.
409     * @param in the node that will will replace the existing node.
410     */
411    public void replaceWith(Node in) {
412        Validate.notNull(in);
413        Validate.notNull(parentNode);
414        parentNode.replaceChild(this, in);
415    }
416
417    protected void setParentNode(Node parentNode) {
418        Validate.notNull(parentNode);
419        if (this.parentNode != null)
420            this.parentNode.removeChild(this);
421        this.parentNode = parentNode;
422    }
423
424    protected void replaceChild(Node out, Node in) {
425        Validate.isTrue(out.parentNode == this);
426        Validate.notNull(in);
427        if (in.parentNode != null)
428            in.parentNode.removeChild(in);
429
430        final int index = out.siblingIndex;
431        ensureChildNodes().set(index, in);
432        in.parentNode = this;
433        in.setSiblingIndex(index);
434        out.parentNode = null;
435    }
436
437    protected void removeChild(Node out) {
438        Validate.isTrue(out.parentNode == this);
439        final int index = out.siblingIndex;
440        ensureChildNodes().remove(index);
441        reindexChildren(index);
442        out.parentNode = null;
443    }
444
445    protected void addChildren(Node... children) {
446        //most used. short circuit addChildren(int), which hits reindex children and array copy
447        final List<Node> nodes = ensureChildNodes();
448
449        for (Node child: children) {
450            reparentChild(child);
451            nodes.add(child);
452            child.setSiblingIndex(nodes.size()-1);
453        }
454    }
455
456    protected void addChildren(int index, Node... children) {
457        Validate.noNullElements(children);
458        final List<Node> nodes = ensureChildNodes();
459
460        for (Node child : children) {
461            reparentChild(child);
462        }
463        nodes.addAll(index, Arrays.asList(children));
464        reindexChildren(index);
465    }
466    
467    protected void reparentChild(Node child) {
468        child.setParentNode(this);
469    }
470
471    private void reindexChildren(int start) {
472        final List<Node> childNodes = ensureChildNodes();
473
474        for (int i = start; i < childNodes.size(); i++) {
475            childNodes.get(i).setSiblingIndex(i);
476        }
477    }
478
479    /**
480     Retrieves this node's sibling nodes. Similar to {@link #childNodes()  node.parent.childNodes()}, but does not
481     include this node (a node is not a sibling of itself).
482     @return node siblings. If the node has no parent, returns an empty list.
483     */
484    public List<Node> siblingNodes() {
485        if (parentNode == null)
486            return Collections.emptyList();
487
488        List<Node> nodes = parentNode.ensureChildNodes();
489        List<Node> siblings = new ArrayList<>(nodes.size() - 1);
490        for (Node node: nodes)
491            if (node != this)
492                siblings.add(node);
493        return siblings;
494    }
495
496    /**
497     Get this node's next sibling.
498     @return next sibling, or null if this is the last sibling
499     */
500    public Node nextSibling() {
501        if (parentNode == null)
502            return null; // root
503
504        final List<Node> siblings = parentNode.ensureChildNodes();
505        final int index = siblingIndex+1;
506        if (siblings.size() > index)
507            return siblings.get(index);
508        else
509            return null;
510    }
511
512    /**
513     Get this node's previous sibling.
514     @return the previous sibling, or null if this is the first sibling
515     */
516    public Node previousSibling() {
517        if (parentNode == null)
518            return null; // root
519
520        if (siblingIndex > 0)
521            return parentNode.ensureChildNodes().get(siblingIndex-1);
522        else
523            return null;
524    }
525
526    /**
527     * Get the list index of this node in its node sibling list. I.e. if this is the first node
528     * sibling, returns 0.
529     * @return position in node sibling list
530     * @see org.jsoup.nodes.Element#elementSiblingIndex()
531     */
532    public int siblingIndex() {
533        return siblingIndex;
534    }
535
536    protected void setSiblingIndex(int siblingIndex) {
537        this.siblingIndex = siblingIndex;
538    }
539
540    /**
541     * Perform a depth-first traversal through this node and its descendants.
542     * @param nodeVisitor the visitor callbacks to perform on each node
543     * @return this node, for chaining
544     */
545    public Node traverse(NodeVisitor nodeVisitor) {
546        Validate.notNull(nodeVisitor);
547        NodeTraversor.traverse(nodeVisitor, this);
548        return this;
549    }
550
551    /**
552     * Perform a depth-first filtering through this node and its descendants.
553     * @param nodeFilter the filter callbacks to perform on each node
554     * @return this node, for chaining
555     */
556    public Node filter(NodeFilter nodeFilter) {
557        Validate.notNull(nodeFilter);
558        NodeTraversor.filter(nodeFilter, this);
559        return this;
560    }
561
562    /**
563     Get the outer HTML of this node.
564     @return HTML
565     */
566    public String outerHtml() {
567        StringBuilder accum = new StringBuilder(128);
568        outerHtml(accum);
569        return accum.toString();
570    }
571
572    protected void outerHtml(Appendable accum) {
573        NodeTraversor.traverse(new OuterHtmlVisitor(accum, getOutputSettings()), this);
574    }
575
576    // if this node has no document (or parent), retrieve the default output settings
577    Document.OutputSettings getOutputSettings() {
578        Document owner = ownerDocument();
579        return owner != null ? owner.outputSettings() : (new Document("")).outputSettings();
580    }
581
582    /**
583     Get the outer HTML of this node.
584     @param accum accumulator to place HTML into
585     @throws IOException if appending to the given accumulator fails.
586     */
587    abstract void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
588
589    abstract void outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
590
591    /**
592     * Write this node and its children to the given {@link Appendable}.
593     *
594     * @param appendable the {@link Appendable} to write to.
595     * @return the supplied {@link Appendable}, for chaining.
596     */
597    public <T extends Appendable> T html(T appendable) {
598        outerHtml(appendable);
599        return appendable;
600    }
601
602        public String toString() {
603        return outerHtml();
604    }
605
606    protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
607        accum.append('\n').append(StringUtil.padding(depth * out.indentAmount()));
608    }
609
610    /**
611     * Check if this node is the same instance of another (object identity test).
612     * @param o other object to compare to
613     * @return true if the content of this node is the same as the other
614     * @see Node#hasSameValue(Object) to compare nodes by their value
615     */
616    @Override
617    public boolean equals(Object o) {
618        // implemented just so that javadoc is clear this is an identity test
619        return this == o;
620    }
621
622    /**
623     * Check if this node is has the same content as another node. A node is considered the same if its name, attributes and content match the
624     * other node; particularly its position in the tree does not influence its similarity.
625     * @param o other object to compare to
626     * @return true if the content of this node is the same as the other
627     */
628
629    public boolean hasSameValue(Object o) {
630        if (this == o) return true;
631        if (o == null || getClass() != o.getClass()) return false;
632
633        return this.outerHtml().equals(((Node) o).outerHtml());
634    }
635
636    /**
637     * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
638     * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
639     * original node.
640     * <p>
641     * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
642     * @return stand-alone cloned node
643     */
644    @Override
645    public Node clone() {
646        Node thisClone = doClone(null); // splits for orphan
647
648        // Queue up nodes that need their children cloned (BFS).
649        final LinkedList<Node> nodesToProcess = new LinkedList<>();
650        nodesToProcess.add(thisClone);
651
652        while (!nodesToProcess.isEmpty()) {
653            Node currParent = nodesToProcess.remove();
654
655            final int size = currParent.childNodeSize();
656            for (int i = 0; i < size; i++) {
657                final List<Node> childNodes = currParent.ensureChildNodes();
658                Node childClone = childNodes.get(i).doClone(currParent);
659                childNodes.set(i, childClone);
660                nodesToProcess.add(childClone);
661            }
662        }
663
664        return thisClone;
665    }
666
667    /*
668     * Return a clone of the node using the given parent (which can be null).
669     * Not a deep copy of children.
670     */
671    protected Node doClone(Node parent) {
672        Node clone;
673
674        try {
675            clone = (Node) super.clone();
676        } catch (CloneNotSupportedException e) {
677            throw new RuntimeException(e);
678        }
679
680        clone.parentNode = parent; // can be null, to create an orphan split
681        clone.siblingIndex = parent == null ? 0 : siblingIndex;
682
683        return clone;
684    }
685
686    private static class OuterHtmlVisitor implements NodeVisitor {
687        private Appendable accum;
688        private Document.OutputSettings out;
689
690        OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) {
691            this.accum = accum;
692            this.out = out;
693            out.prepareEncoder();
694        }
695
696        public void head(Node node, int depth) {
697            try {
698                                node.outerHtmlHead(accum, depth, out);
699                        } catch (IOException exception) {
700                                throw new SerializationException(exception);
701                        }
702        }
703
704        public void tail(Node node, int depth) {
705            if (!node.nodeName().equals("#text")) { // saves a void hit.
706                                try {
707                                        node.outerHtmlTail(accum, depth, out);
708                                } catch (IOException exception) {
709                                        throw new SerializationException(exception);
710                                }
711            }
712        }
713    }
714}