001package org.jsoup.parser; 002 003import org.jsoup.helper.Validate; 004 005import java.util.HashMap; 006import java.util.Map; 007 008/** 009 * HTML Tag capabilities. 010 * 011 * @author Jonathan Hedley, jonathan@hedley.net 012 */ 013public class Tag { 014 private static final Map<String, Tag> tags = new HashMap<>(); // map of known tags 015 016 private String tagName; 017 private boolean isBlock = true; // block or inline 018 private boolean formatAsBlock = true; // should be formatted as a block 019 private boolean canContainInline = true; // only pcdata if not 020 private boolean empty = false; // can hold nothing; e.g. img 021 private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty. 022 private boolean preserveWhitespace = false; // for pre, textarea, script etc 023 private boolean formList = false; // a control that appears in forms: input, textarea, output etc 024 private boolean formSubmit = false; // a control that can be submitted in a form: input etc 025 026 private Tag(String tagName) { 027 this.tagName = tagName; 028 } 029 030 /** 031 * Get this tag's name. 032 * 033 * @return the tag's name 034 */ 035 public String getName() { 036 return tagName; 037 } 038 039 /** 040 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 041 * <p> 042 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 043 * </p> 044 * 045 * @param tagName Name of tag, e.g. "p". Case insensitive. 046 * @param settings used to control tag name sensitivity 047 * @return The tag, either defined or new generic. 048 */ 049 public static Tag valueOf(String tagName, ParseSettings settings) { 050 Validate.notNull(tagName); 051 Tag tag = tags.get(tagName); 052 053 if (tag == null) { 054 tagName = settings.normalizeTag(tagName); 055 Validate.notEmpty(tagName); 056 tag = tags.get(tagName); 057 058 if (tag == null) { 059 // not defined: create default; go anywhere, do anything! (incl be inside a <p>) 060 tag = new Tag(tagName); 061 tag.isBlock = false; 062 } 063 } 064 return tag; 065 } 066 067 /** 068 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 069 * <p> 070 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 071 * </p> 072 * 073 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 074 * @return The tag, either defined or new generic. 075 */ 076 public static Tag valueOf(String tagName) { 077 return valueOf(tagName, ParseSettings.preserveCase); 078 } 079 080 /** 081 * Gets if this is a block tag. 082 * 083 * @return if block tag 084 */ 085 public boolean isBlock() { 086 return isBlock; 087 } 088 089 /** 090 * Gets if this tag should be formatted as a block (or as inline) 091 * 092 * @return if should be formatted as block or inline 093 */ 094 public boolean formatAsBlock() { 095 return formatAsBlock; 096 } 097 098 /** 099 * Gets if this tag can contain block tags. 100 * 101 * @return if tag can contain block tags 102 * @deprecated No longer used, and no different result than {{@link #isBlock()}} 103 */ 104 public boolean canContainBlock() { 105 return isBlock; 106 } 107 108 /** 109 * Gets if this tag is an inline tag. 110 * 111 * @return if this tag is an inline tag. 112 */ 113 public boolean isInline() { 114 return !isBlock; 115 } 116 117 /** 118 * Gets if this tag is a data only tag. 119 * 120 * @return if this tag is a data only tag 121 */ 122 public boolean isData() { 123 return !canContainInline && !isEmpty(); 124 } 125 126 /** 127 * Get if this is an empty tag 128 * 129 * @return if this is an empty tag 130 */ 131 public boolean isEmpty() { 132 return empty; 133 } 134 135 /** 136 * Get if this tag is self closing. 137 * 138 * @return if this tag should be output as self closing. 139 */ 140 public boolean isSelfClosing() { 141 return empty || selfClosing; 142 } 143 144 /** 145 * Get if this is a pre-defined tag, or was auto created on parsing. 146 * 147 * @return if a known tag 148 */ 149 public boolean isKnownTag() { 150 return tags.containsKey(tagName); 151 } 152 153 /** 154 * Check if this tagname is a known tag. 155 * 156 * @param tagName name of tag 157 * @return if known HTML tag 158 */ 159 public static boolean isKnownTag(String tagName) { 160 return tags.containsKey(tagName); 161 } 162 163 /** 164 * Get if this tag should preserve whitespace within child text nodes. 165 * 166 * @return if preserve whitespace 167 */ 168 public boolean preserveWhitespace() { 169 return preserveWhitespace; 170 } 171 172 /** 173 * Get if this tag represents a control associated with a form. E.g. input, textarea, output 174 * @return if associated with a form 175 */ 176 public boolean isFormListed() { 177 return formList; 178 } 179 180 /** 181 * Get if this tag represents an element that should be submitted with a form. E.g. input, option 182 * @return if submittable with a form 183 */ 184 public boolean isFormSubmittable() { 185 return formSubmit; 186 } 187 188 Tag setSelfClosing() { 189 selfClosing = true; 190 return this; 191 } 192 193 @Override 194 public boolean equals(Object o) { 195 if (this == o) return true; 196 if (!(o instanceof Tag)) return false; 197 198 Tag tag = (Tag) o; 199 200 if (!tagName.equals(tag.tagName)) return false; 201 if (canContainInline != tag.canContainInline) return false; 202 if (empty != tag.empty) return false; 203 if (formatAsBlock != tag.formatAsBlock) return false; 204 if (isBlock != tag.isBlock) return false; 205 if (preserveWhitespace != tag.preserveWhitespace) return false; 206 if (selfClosing != tag.selfClosing) return false; 207 if (formList != tag.formList) return false; 208 return formSubmit == tag.formSubmit; 209 } 210 211 @Override 212 public int hashCode() { 213 int result = tagName.hashCode(); 214 result = 31 * result + (isBlock ? 1 : 0); 215 result = 31 * result + (formatAsBlock ? 1 : 0); 216 result = 31 * result + (canContainInline ? 1 : 0); 217 result = 31 * result + (empty ? 1 : 0); 218 result = 31 * result + (selfClosing ? 1 : 0); 219 result = 31 * result + (preserveWhitespace ? 1 : 0); 220 result = 31 * result + (formList ? 1 : 0); 221 result = 31 * result + (formSubmit ? 1 : 0); 222 return result; 223 } 224 225 @Override 226 public String toString() { 227 return tagName; 228 } 229 230 // internal static initialisers: 231 // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources 232 private static final String[] blockTags = { 233 "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame", 234 "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6", 235 "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins", 236 "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th", 237 "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main", 238 "svg", "math" 239 }; 240 private static final String[] inlineTags = { 241 "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd", 242 "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q", 243 "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup", 244 "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track", 245 "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track", 246 "data", "bdi", "s" 247 }; 248 private static final String[] emptyTags = { 249 "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command", 250 "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track" 251 }; 252 private static final String[] formatAsInlineTags = { 253 "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style", 254 "ins", "del", "s" 255 }; 256 private static final String[] preserveWhitespaceTags = { 257 "pre", "plaintext", "title", "textarea" 258 // script is not here as it is a data node, which always preserve whitespace 259 }; 260 // todo: I think we just need submit tags, and can scrub listed 261 private static final String[] formListedTags = { 262 "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea" 263 }; 264 private static final String[] formSubmitTags = { 265 "input", "keygen", "object", "select", "textarea" 266 }; 267 268 static { 269 // creates 270 for (String tagName : blockTags) { 271 Tag tag = new Tag(tagName); 272 register(tag); 273 } 274 for (String tagName : inlineTags) { 275 Tag tag = new Tag(tagName); 276 tag.isBlock = false; 277 tag.formatAsBlock = false; 278 register(tag); 279 } 280 281 // mods: 282 for (String tagName : emptyTags) { 283 Tag tag = tags.get(tagName); 284 Validate.notNull(tag); 285 tag.canContainInline = false; 286 tag.empty = true; 287 } 288 289 for (String tagName : formatAsInlineTags) { 290 Tag tag = tags.get(tagName); 291 Validate.notNull(tag); 292 tag.formatAsBlock = false; 293 } 294 295 for (String tagName : preserveWhitespaceTags) { 296 Tag tag = tags.get(tagName); 297 Validate.notNull(tag); 298 tag.preserveWhitespace = true; 299 } 300 301 for (String tagName : formListedTags) { 302 Tag tag = tags.get(tagName); 303 Validate.notNull(tag); 304 tag.formList = true; 305 } 306 307 for (String tagName : formSubmitTags) { 308 Tag tag = tags.get(tagName); 309 Validate.notNull(tag); 310 tag.formSubmit = true; 311 } 312 } 313 314 private static void register(Tag tag) { 315 tags.put(tag.tagName, tag); 316 } 317}