001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.nodes.Attributes;
005
006import static org.jsoup.internal.Normalizer.lowerCase;
007
008/**
009 * Parse tokens for the Tokeniser.
010 */
011abstract class Token {
012    TokenType type;
013
014    private Token() {
015    }
016    
017    String tokenType() {
018        return this.getClass().getSimpleName();
019    }
020
021    /**
022     * Reset the data represent by this token, for reuse. Prevents the need to create transfer objects for every
023     * piece of data, which immediately get GCed.
024     */
025    abstract Token reset();
026
027    static void reset(StringBuilder sb) {
028        if (sb != null) {
029            sb.delete(0, sb.length());
030        }
031    }
032
033    static final class Doctype extends Token {
034        final StringBuilder name = new StringBuilder();
035        String pubSysKey = null;
036        final StringBuilder publicIdentifier = new StringBuilder();
037        final StringBuilder systemIdentifier = new StringBuilder();
038        boolean forceQuirks = false;
039
040        Doctype() {
041            type = TokenType.Doctype;
042        }
043
044        @Override
045        Token reset() {
046            reset(name);
047            pubSysKey = null;
048            reset(publicIdentifier);
049            reset(systemIdentifier);
050            forceQuirks = false;
051            return this;
052        }
053
054        String getName() {
055            return name.toString();
056        }
057
058        String getPubSysKey() {
059            return pubSysKey;
060        }
061
062        String getPublicIdentifier() {
063            return publicIdentifier.toString();
064        }
065
066        public String getSystemIdentifier() {
067            return systemIdentifier.toString();
068        }
069
070        public boolean isForceQuirks() {
071            return forceQuirks;
072        }
073    }
074
075    static abstract class Tag extends Token {
076        protected String tagName;
077        protected String normalName; // lc version of tag name, for case insensitive tree build
078        private String pendingAttributeName; // attribute names are generally caught in one hop, not accumulated
079        private StringBuilder pendingAttributeValue = new StringBuilder(); // but values are accumulated, from e.g. & in hrefs
080        private String pendingAttributeValueS; // try to get attr vals in one shot, vs Builder
081        private boolean hasEmptyAttributeValue = false; // distinguish boolean attribute from empty string value
082        private boolean hasPendingAttributeValue = false;
083        boolean selfClosing = false;
084        Attributes attributes; // start tags get attributes on construction. End tags get attributes on first new attribute (but only for parser convenience, not used).
085
086        @Override
087        Tag reset() {
088            tagName = null;
089            normalName = null;
090            pendingAttributeName = null;
091            reset(pendingAttributeValue);
092            pendingAttributeValueS = null;
093            hasEmptyAttributeValue = false;
094            hasPendingAttributeValue = false;
095            selfClosing = false;
096            attributes = null;
097            return this;
098        }
099
100        final void newAttribute() {
101            if (attributes == null)
102                attributes = new Attributes();
103
104            if (pendingAttributeName != null) {
105                // the tokeniser has skipped whitespace control chars, but trimming could collapse to empty for other control codes, so verify here
106                pendingAttributeName = pendingAttributeName.trim();
107                if (pendingAttributeName.length() > 0) {
108                    String value;
109                    if (hasPendingAttributeValue)
110                        value = pendingAttributeValue.length() > 0 ? pendingAttributeValue.toString() : pendingAttributeValueS;
111                    else if (hasEmptyAttributeValue)
112                        value = "";
113                    else
114                        value = null;
115                    attributes.put(pendingAttributeName, value);
116                }
117            }
118            pendingAttributeName = null;
119            hasEmptyAttributeValue = false;
120            hasPendingAttributeValue = false;
121            reset(pendingAttributeValue);
122            pendingAttributeValueS = null;
123        }
124
125        final void finaliseTag() {
126            // finalises for emit
127            if (pendingAttributeName != null) {
128                // todo: check if attribute name exists; if so, drop and error
129                newAttribute();
130            }
131        }
132
133        final String name() { // preserves case, for input into Tag.valueOf (which may drop case)
134            Validate.isFalse(tagName == null || tagName.length() == 0);
135            return tagName;
136        }
137
138        final String normalName() { // loses case, used in tree building for working out where in tree it should go
139            return normalName;
140        }
141
142        final Tag name(String name) {
143            tagName = name;
144            normalName = lowerCase(name);
145            return this;
146        }
147
148        final boolean isSelfClosing() {
149            return selfClosing;
150        }
151
152        @SuppressWarnings({"TypeMayBeWeakened"})
153        final Attributes getAttributes() {
154            return attributes;
155        }
156
157        // these appenders are rarely hit in not null state-- caused by null chars.
158        final void appendTagName(String append) {
159            tagName = tagName == null ? append : tagName.concat(append);
160            normalName = lowerCase(tagName);
161        }
162
163        final void appendTagName(char append) {
164            appendTagName(String.valueOf(append));
165        }
166
167        final void appendAttributeName(String append) {
168            pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
169        }
170
171        final void appendAttributeName(char append) {
172            appendAttributeName(String.valueOf(append));
173        }
174
175        final void appendAttributeValue(String append) {
176            ensureAttributeValue();
177            if (pendingAttributeValue.length() == 0) {
178                pendingAttributeValueS = append;
179            } else {
180                pendingAttributeValue.append(append);
181            }
182        }
183
184        final void appendAttributeValue(char append) {
185            ensureAttributeValue();
186            pendingAttributeValue.append(append);
187        }
188
189        final void appendAttributeValue(char[] append) {
190            ensureAttributeValue();
191            pendingAttributeValue.append(append);
192        }
193
194        final void appendAttributeValue(int[] appendCodepoints) {
195            ensureAttributeValue();
196            for (int codepoint : appendCodepoints) {
197                pendingAttributeValue.appendCodePoint(codepoint);
198            }
199        }
200        
201        final void setEmptyAttributeValue() {
202            hasEmptyAttributeValue = true;
203        }
204
205        private void ensureAttributeValue() {
206            hasPendingAttributeValue = true;
207            // if on second hit, we'll need to move to the builder
208            if (pendingAttributeValueS != null) {
209                pendingAttributeValue.append(pendingAttributeValueS);
210                pendingAttributeValueS = null;
211            }
212        }
213    }
214
215    final static class StartTag extends Tag {
216        StartTag() {
217            super();
218            attributes = new Attributes();
219            type = TokenType.StartTag;
220        }
221
222        @Override
223        Tag reset() {
224            super.reset();
225            attributes = new Attributes();
226            // todo - would prefer these to be null, but need to check Element assertions
227            return this;
228        }
229
230        StartTag nameAttr(String name, Attributes attributes) {
231            this.tagName = name;
232            this.attributes = attributes;
233            normalName = lowerCase(tagName);
234            return this;
235        }
236
237        @Override
238        public String toString() {
239            if (attributes != null && attributes.size() > 0)
240                return "<" + name() + " " + attributes.toString() + ">";
241            else
242                return "<" + name() + ">";
243        }
244    }
245
246    final static class EndTag extends Tag{
247        EndTag() {
248            super();
249            type = TokenType.EndTag;
250        }
251
252        @Override
253        public String toString() {
254            return "</" + name() + ">";
255        }
256    }
257
258    final static class Comment extends Token {
259        final StringBuilder data = new StringBuilder();
260        boolean bogus = false;
261
262        @Override
263        Token reset() {
264            reset(data);
265            bogus = false;
266            return this;
267        }
268
269        Comment() {
270            type = TokenType.Comment;
271        }
272
273        String getData() {
274            return data.toString();
275        }
276
277        @Override
278        public String toString() {
279            return "<!--" + getData() + "-->";
280        }
281    }
282
283    final static class Character extends Token {
284        private String data;
285
286        Character() {
287            super();
288            type = TokenType.Character;
289        }
290
291        @Override
292        Token reset() {
293            data = null;
294            return this;
295        }
296
297        Character data(String data) {
298            this.data = data;
299            return this;
300        }
301
302        String getData() {
303            return data;
304        }
305
306        @Override
307        public String toString() {
308            return getData();
309        }
310    }
311
312    final static class EOF extends Token {
313        EOF() {
314            type = Token.TokenType.EOF;
315        }
316
317        @Override
318        Token reset() {
319            return this;
320        }
321    }
322
323    final boolean isDoctype() {
324        return type == TokenType.Doctype;
325    }
326
327    final Doctype asDoctype() {
328        return (Doctype) this;
329    }
330
331    final boolean isStartTag() {
332        return type == TokenType.StartTag;
333    }
334
335    final StartTag asStartTag() {
336        return (StartTag) this;
337    }
338
339    final boolean isEndTag() {
340        return type == TokenType.EndTag;
341    }
342
343    final EndTag asEndTag() {
344        return (EndTag) this;
345    }
346
347    final boolean isComment() {
348        return type == TokenType.Comment;
349    }
350
351    final Comment asComment() {
352        return (Comment) this;
353    }
354
355    final boolean isCharacter() {
356        return type == TokenType.Character;
357    }
358
359    final Character asCharacter() {
360        return (Character) this;
361    }
362
363    final boolean isEOF() {
364        return type == TokenType.EOF;
365    }
366
367    enum TokenType {
368        Doctype,
369        StartTag,
370        EndTag,
371        Comment,
372        Character,
373        EOF
374    }
375}