001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004
005import java.util.HashMap;
006import java.util.Map;
007
008/**
009 * HTML Tag capabilities.
010 *
011 * @author Jonathan Hedley, jonathan@hedley.net
012 */
013public class Tag {
014    private static final Map<String, Tag> tags = new HashMap<>(); // map of known tags
015
016    private String tagName;
017    private boolean isBlock = true; // block or inline
018    private boolean formatAsBlock = true; // should be formatted as a block
019    private boolean canContainInline = true; // only pcdata if not
020    private boolean empty = false; // can hold nothing; e.g. img
021    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
022    private boolean preserveWhitespace = false; // for pre, textarea, script etc
023    private boolean formList = false; // a control that appears in forms: input, textarea, output etc
024    private boolean formSubmit = false; // a control that can be submitted in a form: input etc
025
026    private Tag(String tagName) {
027        this.tagName = tagName;
028    }
029
030    /**
031     * Get this tag's name.
032     *
033     * @return the tag's name
034     */
035    public String getName() {
036        return tagName;
037    }
038
039    /**
040     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
041     * <p>
042     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
043     * </p>
044     * 
045     * @param tagName Name of tag, e.g. "p". Case insensitive.
046     * @param settings used to control tag name sensitivity
047     * @return The tag, either defined or new generic.
048     */
049    public static Tag valueOf(String tagName, ParseSettings settings) {
050        Validate.notNull(tagName);
051        Tag tag = tags.get(tagName);
052
053        if (tag == null) {
054            tagName = settings.normalizeTag(tagName);
055            Validate.notEmpty(tagName);
056            tag = tags.get(tagName);
057
058            if (tag == null) {
059                // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
060                tag = new Tag(tagName);
061                tag.isBlock = false;
062            }
063        }
064        return tag;
065    }
066
067    /**
068     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
069     * <p>
070     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
071     * </p>
072     *
073     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
074     * @return The tag, either defined or new generic.
075     */
076    public static Tag valueOf(String tagName) {
077        return valueOf(tagName, ParseSettings.preserveCase);
078    }
079
080    /**
081     * Gets if this is a block tag.
082     *
083     * @return if block tag
084     */
085    public boolean isBlock() {
086        return isBlock;
087    }
088
089    /**
090     * Gets if this tag should be formatted as a block (or as inline)
091     *
092     * @return if should be formatted as block or inline
093     */
094    public boolean formatAsBlock() {
095        return formatAsBlock;
096    }
097
098    /**
099     * Gets if this tag can contain block tags.
100     *
101     * @return if tag can contain block tags
102     * @deprecated No longer used, and no different result than {{@link #isBlock()}}
103     */
104    public boolean canContainBlock() {
105        return isBlock;
106    }
107
108    /**
109     * Gets if this tag is an inline tag.
110     *
111     * @return if this tag is an inline tag.
112     */
113    public boolean isInline() {
114        return !isBlock;
115    }
116
117    /**
118     * Gets if this tag is a data only tag.
119     *
120     * @return if this tag is a data only tag
121     */
122    public boolean isData() {
123        return !canContainInline && !isEmpty();
124    }
125
126    /**
127     * Get if this is an empty tag
128     *
129     * @return if this is an empty tag
130     */
131    public boolean isEmpty() {
132        return empty;
133    }
134
135    /**
136     * Get if this tag is self closing.
137     *
138     * @return if this tag should be output as self closing.
139     */
140    public boolean isSelfClosing() {
141        return empty || selfClosing;
142    }
143
144    /**
145     * Get if this is a pre-defined tag, or was auto created on parsing.
146     *
147     * @return if a known tag
148     */
149    public boolean isKnownTag() {
150        return tags.containsKey(tagName);
151    }
152
153    /**
154     * Check if this tagname is a known tag.
155     *
156     * @param tagName name of tag
157     * @return if known HTML tag
158     */
159    public static boolean isKnownTag(String tagName) {
160        return tags.containsKey(tagName);
161    }
162
163    /**
164     * Get if this tag should preserve whitespace within child text nodes.
165     *
166     * @return if preserve whitespace
167     */
168    public boolean preserveWhitespace() {
169        return preserveWhitespace;
170    }
171
172    /**
173     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
174     * @return if associated with a form
175     */
176    public boolean isFormListed() {
177        return formList;
178    }
179
180    /**
181     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
182     * @return if submittable with a form
183     */
184    public boolean isFormSubmittable() {
185        return formSubmit;
186    }
187
188    Tag setSelfClosing() {
189        selfClosing = true;
190        return this;
191    }
192
193    @Override
194    public boolean equals(Object o) {
195        if (this == o) return true;
196        if (!(o instanceof Tag)) return false;
197
198        Tag tag = (Tag) o;
199
200        if (!tagName.equals(tag.tagName)) return false;
201        if (canContainInline != tag.canContainInline) return false;
202        if (empty != tag.empty) return false;
203        if (formatAsBlock != tag.formatAsBlock) return false;
204        if (isBlock != tag.isBlock) return false;
205        if (preserveWhitespace != tag.preserveWhitespace) return false;
206        if (selfClosing != tag.selfClosing) return false;
207        if (formList != tag.formList) return false;
208        return formSubmit == tag.formSubmit;
209    }
210
211    @Override
212    public int hashCode() {
213        int result = tagName.hashCode();
214        result = 31 * result + (isBlock ? 1 : 0);
215        result = 31 * result + (formatAsBlock ? 1 : 0);
216        result = 31 * result + (canContainInline ? 1 : 0);
217        result = 31 * result + (empty ? 1 : 0);
218        result = 31 * result + (selfClosing ? 1 : 0);
219        result = 31 * result + (preserveWhitespace ? 1 : 0);
220        result = 31 * result + (formList ? 1 : 0);
221        result = 31 * result + (formSubmit ? 1 : 0);
222        return result;
223    }
224
225    @Override
226    public String toString() {
227        return tagName;
228    }
229
230    // internal static initialisers:
231    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
232    private static final String[] blockTags = {
233            "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
234            "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
235            "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
236            "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
237            "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
238            "svg", "math"
239    };
240    private static final String[] inlineTags = {
241            "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
242            "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
243            "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
244            "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
245            "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
246            "data", "bdi", "s"
247    };
248    private static final String[] emptyTags = {
249            "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
250            "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
251    };
252    private static final String[] formatAsInlineTags = {
253            "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
254            "ins", "del", "s"
255    };
256    private static final String[] preserveWhitespaceTags = {
257            "pre", "plaintext", "title", "textarea"
258            // script is not here as it is a data node, which always preserve whitespace
259    };
260    // todo: I think we just need submit tags, and can scrub listed
261    private static final String[] formListedTags = {
262            "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
263    };
264    private static final String[] formSubmitTags = {
265            "input", "keygen", "object", "select", "textarea"
266    };
267
268    static {
269        // creates
270        for (String tagName : blockTags) {
271            Tag tag = new Tag(tagName);
272            register(tag);
273        }
274        for (String tagName : inlineTags) {
275            Tag tag = new Tag(tagName);
276            tag.isBlock = false;
277            tag.formatAsBlock = false;
278            register(tag);
279        }
280
281        // mods:
282        for (String tagName : emptyTags) {
283            Tag tag = tags.get(tagName);
284            Validate.notNull(tag);
285            tag.canContainInline = false;
286            tag.empty = true;
287        }
288
289        for (String tagName : formatAsInlineTags) {
290            Tag tag = tags.get(tagName);
291            Validate.notNull(tag);
292            tag.formatAsBlock = false;
293        }
294
295        for (String tagName : preserveWhitespaceTags) {
296            Tag tag = tags.get(tagName);
297            Validate.notNull(tag);
298            tag.preserveWhitespace = true;
299        }
300
301        for (String tagName : formListedTags) {
302            Tag tag = tags.get(tagName);
303            Validate.notNull(tag);
304            tag.formList = true;
305        }
306
307        for (String tagName : formSubmitTags) {
308            Tag tag = tags.get(tagName);
309            Validate.notNull(tag);
310            tag.formSubmit = true;
311        }
312    }
313
314    private static void register(Tag tag) {
315        tags.put(tag.tagName, tag);
316    }
317}