001package org.jsoup.nodes; 002 003import org.jsoup.helper.StringUtil; 004import org.jsoup.helper.Validate; 005 006import java.io.IOException; 007 008/** 009 A text node. 010 011 @author Jonathan Hedley, jonathan@hedley.net */ 012public class TextNode extends LeafNode { 013 014 /** 015 Create a new TextNode representing the supplied (unencoded) text). 016 017 @param text raw text 018 @see #createFromEncoded(String) 019 */ 020 public TextNode(String text) { 021 value = text; 022 } 023 024 /** 025 Create a new TextNode representing the supplied (unencoded) text). 026 027 @param text raw text 028 @param baseUri base uri - ignored for this node type 029 @see #createFromEncoded(String, String) 030 @deprecated use {@link TextNode#TextNode(String)} 031 */ 032 public TextNode(String text, String baseUri) { 033 this(text); 034 } 035 036 public String nodeName() { 037 return "#text"; 038 } 039 040 /** 041 * Get the text content of this text node. 042 * @return Unencoded, normalised text. 043 * @see TextNode#getWholeText() 044 */ 045 public String text() { 046 return normaliseWhitespace(getWholeText()); 047 } 048 049 /** 050 * Set the text content of this text node. 051 * @param text unencoded text 052 * @return this, for chaining 053 */ 054 public TextNode text(String text) { 055 coreValue(text); 056 return this; 057 } 058 059 /** 060 Get the (unencoded) text of this text node, including any newlines and spaces present in the original. 061 @return text 062 */ 063 public String getWholeText() { 064 return coreValue(); 065 } 066 067 /** 068 Test if this text node is blank -- that is, empty or only whitespace (including newlines). 069 @return true if this document is empty or only whitespace, false if it contains any text content. 070 */ 071 public boolean isBlank() { 072 return StringUtil.isBlank(coreValue()); 073 } 074 075 /** 076 * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the 077 * original text up to the offset, and will have a new text node sibling containing the text after the offset. 078 * @param offset string offset point to split node at. 079 * @return the newly created text node containing the text after the offset. 080 */ 081 public TextNode splitText(int offset) { 082 final String text = coreValue(); 083 Validate.isTrue(offset >= 0, "Split offset must be not be negative"); 084 Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length"); 085 086 String head = text.substring(0, offset); 087 String tail = text.substring(offset); 088 text(head); 089 TextNode tailNode = new TextNode(tail); 090 if (parent() != null) 091 parent().addChildren(siblingIndex()+1, tailNode); 092 093 return tailNode; 094 } 095 096 void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 097 if (out.prettyPrint() && ((siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) || (out.outline() && siblingNodes().size()>0 && !isBlank()) )) 098 indent(accum, depth, out); 099 100 boolean normaliseWhite = out.prettyPrint() && parent() instanceof Element 101 && !Element.preserveWhitespace(parent()); 102 Entities.escape(accum, coreValue(), out, false, normaliseWhite, false); 103 } 104 105 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {} 106 107 @Override 108 public String toString() { 109 return outerHtml(); 110 } 111 112 /** 113 * Create a new TextNode from HTML encoded (aka escaped) data. 114 * @param encodedText Text containing encoded HTML (e.g. &lt;) 115 * @param baseUri Base uri 116 * @return TextNode containing unencoded data (e.g. <) 117 * @deprecated use {@link TextNode#createFromEncoded(String)} instead, as LeafNodes don't carry base URIs. 118 */ 119 public static TextNode createFromEncoded(String encodedText, String baseUri) { 120 String text = Entities.unescape(encodedText); 121 return new TextNode(text); 122 } 123 124 /** 125 * Create a new TextNode from HTML encoded (aka escaped) data. 126 * @param encodedText Text containing encoded HTML (e.g. &lt;) 127 * @return TextNode containing unencoded data (e.g. <) 128 */ 129 public static TextNode createFromEncoded(String encodedText) { 130 String text = Entities.unescape(encodedText); 131 return new TextNode(text); 132 } 133 134 static String normaliseWhitespace(String text) { 135 text = StringUtil.normaliseWhitespace(text); 136 return text; 137 } 138 139 static String stripLeadingWhitespace(String text) { 140 return text.replaceFirst("^\\s+", ""); 141 } 142 143 static boolean lastCharIsWhitespace(StringBuilder sb) { 144 return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; 145 } 146 147 148}