001package org.jsoup.parser;
002
003import org.jsoup.helper.StringUtil;
004import org.jsoup.nodes.Attribute;
005import org.jsoup.nodes.Attributes;
006import org.jsoup.nodes.Document;
007import org.jsoup.nodes.DocumentType;
008import org.jsoup.nodes.Element;
009import org.jsoup.nodes.Node;
010
011import java.util.ArrayList;
012
013/**
014 * The Tree Builder's current state. Each state embodies the processing for the state, and transitions to other states.
015 */
016enum HtmlTreeBuilderState {
017    Initial {
018        boolean process(Token t, HtmlTreeBuilder tb) {
019            if (isWhitespace(t)) {
020                return true; // ignore whitespace
021            } else if (t.isComment()) {
022                tb.insert(t.asComment());
023            } else if (t.isDoctype()) {
024                // todo: parse error check on expected doctypes
025                // todo: quirk state check on doctype ids
026                Token.Doctype d = t.asDoctype();
027                DocumentType doctype = new DocumentType(
028                    tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier());
029                doctype.setPubSysKey(d.getPubSysKey());
030                tb.getDocument().appendChild(doctype);
031                if (d.isForceQuirks())
032                    tb.getDocument().quirksMode(Document.QuirksMode.quirks);
033                tb.transition(BeforeHtml);
034            } else {
035                // todo: check not iframe srcdoc
036                tb.transition(BeforeHtml);
037                return tb.process(t); // re-process token
038            }
039            return true;
040        }
041    },
042    BeforeHtml {
043        boolean process(Token t, HtmlTreeBuilder tb) {
044            if (t.isDoctype()) {
045                tb.error(this);
046                return false;
047            } else if (t.isComment()) {
048                tb.insert(t.asComment());
049            } else if (isWhitespace(t)) {
050                return true; // ignore whitespace
051            } else if (t.isStartTag() && t.asStartTag().normalName().equals("html")) {
052                tb.insert(t.asStartTag());
053                tb.transition(BeforeHead);
054            } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().normalName(), "head", "body", "html", "br"))) {
055                return anythingElse(t, tb);
056            } else if (t.isEndTag()) {
057                tb.error(this);
058                return false;
059            } else {
060                return anythingElse(t, tb);
061            }
062            return true;
063        }
064
065        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
066            tb.insertStartTag("html");
067            tb.transition(BeforeHead);
068            return tb.process(t);
069        }
070    },
071    BeforeHead {
072        boolean process(Token t, HtmlTreeBuilder tb) {
073            if (isWhitespace(t)) {
074                return true;
075            } else if (t.isComment()) {
076                tb.insert(t.asComment());
077            } else if (t.isDoctype()) {
078                tb.error(this);
079                return false;
080            } else if (t.isStartTag() && t.asStartTag().normalName().equals("html")) {
081                return InBody.process(t, tb); // does not transition
082            } else if (t.isStartTag() && t.asStartTag().normalName().equals("head")) {
083                Element head = tb.insert(t.asStartTag());
084                tb.setHeadElement(head);
085                tb.transition(InHead);
086            } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().normalName(), "head", "body", "html", "br"))) {
087                tb.processStartTag("head");
088                return tb.process(t);
089            } else if (t.isEndTag()) {
090                tb.error(this);
091                return false;
092            } else {
093                tb.processStartTag("head");
094                return tb.process(t);
095            }
096            return true;
097        }
098    },
099    InHead {
100        boolean process(Token t, HtmlTreeBuilder tb) {
101            if (isWhitespace(t)) {
102                tb.insert(t.asCharacter());
103                return true;
104            }
105            switch (t.type) {
106                case Comment:
107                    tb.insert(t.asComment());
108                    break;
109                case Doctype:
110                    tb.error(this);
111                    return false;
112                case StartTag:
113                    Token.StartTag start = t.asStartTag();
114                    String name = start.normalName();
115                    if (name.equals("html")) {
116                        return InBody.process(t, tb);
117                    } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link")) {
118                        Element el = tb.insertEmpty(start);
119                        // jsoup special: update base the frist time it is seen
120                        if (name.equals("base") && el.hasAttr("href"))
121                            tb.maybeSetBaseUri(el);
122                    } else if (name.equals("meta")) {
123                        Element meta = tb.insertEmpty(start);
124                        // todo: charset switches
125                    } else if (name.equals("title")) {
126                        handleRcData(start, tb);
127                    } else if (StringUtil.in(name, "noframes", "style")) {
128                        handleRawtext(start, tb);
129                    } else if (name.equals("noscript")) {
130                        // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
131                        tb.insert(start);
132                        tb.transition(InHeadNoscript);
133                    } else if (name.equals("script")) {
134                        // skips some script rules as won't execute them
135
136                        tb.tokeniser.transition(TokeniserState.ScriptData);
137                        tb.markInsertionMode();
138                        tb.transition(Text);
139                        tb.insert(start);
140                    } else if (name.equals("head")) {
141                        tb.error(this);
142                        return false;
143                    } else {
144                        return anythingElse(t, tb);
145                    }
146                    break;
147                case EndTag:
148                    Token.EndTag end = t.asEndTag();
149                    name = end.normalName();
150                    if (name.equals("head")) {
151                        tb.pop();
152                        tb.transition(AfterHead);
153                    } else if (StringUtil.in(name, "body", "html", "br")) {
154                        return anythingElse(t, tb);
155                    } else {
156                        tb.error(this);
157                        return false;
158                    }
159                    break;
160                default:
161                    return anythingElse(t, tb);
162            }
163            return true;
164        }
165
166        private boolean anythingElse(Token t, TreeBuilder tb) {
167            tb.processEndTag("head");
168            return tb.process(t);
169        }
170    },
171    InHeadNoscript {
172        boolean process(Token t, HtmlTreeBuilder tb) {
173            if (t.isDoctype()) {
174                tb.error(this);
175            } else if (t.isStartTag() && t.asStartTag().normalName().equals("html")) {
176                return tb.process(t, InBody);
177            } else if (t.isEndTag() && t.asEndTag().normalName().equals("noscript")) {
178                tb.pop();
179                tb.transition(InHead);
180            } else if (isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.in(t.asStartTag().normalName(),
181                    "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
182                return tb.process(t, InHead);
183            } else if (t.isEndTag() && t.asEndTag().normalName().equals("br")) {
184                return anythingElse(t, tb);
185            } else if ((t.isStartTag() && StringUtil.in(t.asStartTag().normalName(), "head", "noscript")) || t.isEndTag()) {
186                tb.error(this);
187                return false;
188            } else {
189                return anythingElse(t, tb);
190            }
191            return true;
192        }
193
194        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
195            tb.error(this);
196            tb.insert(new Token.Character().data(t.toString()));
197            return true;
198        }
199    },
200    AfterHead {
201        boolean process(Token t, HtmlTreeBuilder tb) {
202            if (isWhitespace(t)) {
203                tb.insert(t.asCharacter());
204            } else if (t.isComment()) {
205                tb.insert(t.asComment());
206            } else if (t.isDoctype()) {
207                tb.error(this);
208            } else if (t.isStartTag()) {
209                Token.StartTag startTag = t.asStartTag();
210                String name = startTag.normalName();
211                if (name.equals("html")) {
212                    return tb.process(t, InBody);
213                } else if (name.equals("body")) {
214                    tb.insert(startTag);
215                    tb.framesetOk(false);
216                    tb.transition(InBody);
217                } else if (name.equals("frameset")) {
218                    tb.insert(startTag);
219                    tb.transition(InFrameset);
220                } else if (StringUtil.in(name, "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) {
221                    tb.error(this);
222                    Element head = tb.getHeadElement();
223                    tb.push(head);
224                    tb.process(t, InHead);
225                    tb.removeFromStack(head);
226                } else if (name.equals("head")) {
227                    tb.error(this);
228                    return false;
229                } else {
230                    anythingElse(t, tb);
231                }
232            } else if (t.isEndTag()) {
233                if (StringUtil.in(t.asEndTag().normalName(), "body", "html")) {
234                    anythingElse(t, tb);
235                } else {
236                    tb.error(this);
237                    return false;
238                }
239            } else {
240                anythingElse(t, tb);
241            }
242            return true;
243        }
244
245        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
246            tb.processStartTag("body");
247            tb.framesetOk(true);
248            return tb.process(t);
249        }
250    },
251    InBody {
252        boolean process(Token t, HtmlTreeBuilder tb) {
253            switch (t.type) {
254                case Character: {
255                    Token.Character c = t.asCharacter();
256                    if (c.getData().equals(nullString)) {
257                        // todo confirm that check
258                        tb.error(this);
259                        return false;
260                    } else if (tb.framesetOk() && isWhitespace(c)) { // don't check if whitespace if frames already closed
261                        tb.reconstructFormattingElements();
262                        tb.insert(c);
263                    } else {
264                        tb.reconstructFormattingElements();
265                        tb.insert(c);
266                        tb.framesetOk(false);
267                    }
268                    break;
269                }
270                case Comment: {
271                    tb.insert(t.asComment());
272                    break;
273                }
274                case Doctype: {
275                    tb.error(this);
276                    return false;
277                }
278                case StartTag:
279                    Token.StartTag startTag = t.asStartTag();
280                    // todo - refactor to a switch statement
281                    String name = startTag.normalName();
282                    if (name.equals("a")) {
283                        if (tb.getActiveFormattingElement("a") != null) {
284                            tb.error(this);
285                            tb.processEndTag("a");
286
287                            // still on stack?
288                            Element remainingA = tb.getFromStack("a");
289                            if (remainingA != null) {
290                                tb.removeFromActiveFormattingElements(remainingA);
291                                tb.removeFromStack(remainingA);
292                            }
293                        }
294                        tb.reconstructFormattingElements();
295                        Element a = tb.insert(startTag);
296                        tb.pushActiveFormattingElements(a);
297                    } else if (StringUtil.inSorted(name, Constants.InBodyStartEmptyFormatters)) {
298                        tb.reconstructFormattingElements();
299                        tb.insertEmpty(startTag);
300                        tb.framesetOk(false);
301                    } else if (StringUtil.inSorted(name, Constants.InBodyStartPClosers)) {
302                        if (tb.inButtonScope("p")) {
303                            tb.processEndTag("p");
304                        }
305                        tb.insert(startTag);
306                    } else if (name.equals("span")) {
307                        // same as final else, but short circuits lots of checks
308                        tb.reconstructFormattingElements();
309                        tb.insert(startTag);
310                    } else if (name.equals("li")) {
311                        tb.framesetOk(false);
312                        ArrayList<Element> stack = tb.getStack();
313                        for (int i = stack.size() - 1; i > 0; i--) {
314                            Element el = stack.get(i);
315                            if (el.nodeName().equals("li")) {
316                                tb.processEndTag("li");
317                                break;
318                            }
319                            if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), Constants.InBodyStartLiBreakers))
320                                break;
321                        }
322                        if (tb.inButtonScope("p")) {
323                            tb.processEndTag("p");
324                        }
325                        tb.insert(startTag);
326                    } else if (name.equals("html")) {
327                        tb.error(this);
328                        // merge attributes onto real html
329                        Element html = tb.getStack().get(0);
330                        for (Attribute attribute : startTag.getAttributes()) {
331                            if (!html.hasAttr(attribute.getKey()))
332                                html.attributes().put(attribute);
333                        }
334                    } else if (StringUtil.inSorted(name, Constants.InBodyStartToHead)) {
335                        return tb.process(t, InHead);
336                    } else if (name.equals("body")) {
337                        tb.error(this);
338                        ArrayList<Element> stack = tb.getStack();
339                        if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
340                            // only in fragment case
341                            return false; // ignore
342                        } else {
343                            tb.framesetOk(false);
344                            Element body = stack.get(1);
345                            for (Attribute attribute : startTag.getAttributes()) {
346                                if (!body.hasAttr(attribute.getKey()))
347                                    body.attributes().put(attribute);
348                            }
349                        }
350                    } else if (name.equals("frameset")) {
351                        tb.error(this);
352                        ArrayList<Element> stack = tb.getStack();
353                        if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
354                            // only in fragment case
355                            return false; // ignore
356                        } else if (!tb.framesetOk()) {
357                            return false; // ignore frameset
358                        } else {
359                            Element second = stack.get(1);
360                            if (second.parent() != null)
361                                second.remove();
362                            // pop up to html element
363                            while (stack.size() > 1)
364                                stack.remove(stack.size()-1);
365                            tb.insert(startTag);
366                            tb.transition(InFrameset);
367                        }
368                    } else if (StringUtil.inSorted(name, Constants.Headings)) {
369                        if (tb.inButtonScope("p")) {
370                            tb.processEndTag("p");
371                        }
372                        if (StringUtil.inSorted(tb.currentElement().nodeName(), Constants.Headings)) {
373                            tb.error(this);
374                            tb.pop();
375                        }
376                        tb.insert(startTag);
377                    } else if (StringUtil.inSorted(name, Constants.InBodyStartPreListing)) {
378                        if (tb.inButtonScope("p")) {
379                            tb.processEndTag("p");
380                        }
381                        tb.insert(startTag);
382                        // todo: ignore LF if next token
383                        tb.framesetOk(false);
384                    } else if (name.equals("form")) {
385                        if (tb.getFormElement() != null) {
386                            tb.error(this);
387                            return false;
388                        }
389                        if (tb.inButtonScope("p")) {
390                            tb.processEndTag("p");
391                        }
392                        tb.insertForm(startTag, true);
393                    } else if (StringUtil.inSorted(name, Constants.DdDt)) {
394                        tb.framesetOk(false);
395                        ArrayList<Element> stack = tb.getStack();
396                        for (int i = stack.size() - 1; i > 0; i--) {
397                            Element el = stack.get(i);
398                            if (StringUtil.inSorted(el.nodeName(), Constants.DdDt)) {
399                                tb.processEndTag(el.nodeName());
400                                break;
401                            }
402                            if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), Constants.InBodyStartLiBreakers))
403                                break;
404                        }
405                        if (tb.inButtonScope("p")) {
406                            tb.processEndTag("p");
407                        }
408                        tb.insert(startTag);
409                    } else if (name.equals("plaintext")) {
410                        if (tb.inButtonScope("p")) {
411                            tb.processEndTag("p");
412                        }
413                        tb.insert(startTag);
414                        tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out
415                    } else if (name.equals("button")) {
416                        if (tb.inButtonScope("button")) {
417                            // close and reprocess
418                            tb.error(this);
419                            tb.processEndTag("button");
420                            tb.process(startTag);
421                        } else {
422                            tb.reconstructFormattingElements();
423                            tb.insert(startTag);
424                            tb.framesetOk(false);
425                        }
426                    } else if (StringUtil.inSorted(name, Constants.Formatters)) {
427                        tb.reconstructFormattingElements();
428                        Element el = tb.insert(startTag);
429                        tb.pushActiveFormattingElements(el);
430                    } else if (name.equals("nobr")) {
431                        tb.reconstructFormattingElements();
432                        if (tb.inScope("nobr")) {
433                            tb.error(this);
434                            tb.processEndTag("nobr");
435                            tb.reconstructFormattingElements();
436                        }
437                        Element el = tb.insert(startTag);
438                        tb.pushActiveFormattingElements(el);
439                    } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) {
440                        tb.reconstructFormattingElements();
441                        tb.insert(startTag);
442                        tb.insertMarkerToFormattingElements();
443                        tb.framesetOk(false);
444                    } else if (name.equals("table")) {
445                        if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) {
446                            tb.processEndTag("p");
447                        }
448                        tb.insert(startTag);
449                        tb.framesetOk(false);
450                        tb.transition(InTable);
451                    } else if (name.equals("input")) {
452                        tb.reconstructFormattingElements();
453                        Element el = tb.insertEmpty(startTag);
454                        if (!el.attr("type").equalsIgnoreCase("hidden"))
455                            tb.framesetOk(false);
456                    } else if (StringUtil.inSorted(name, Constants.InBodyStartMedia)) {
457                        tb.insertEmpty(startTag);
458                    } else if (name.equals("hr")) {
459                        if (tb.inButtonScope("p")) {
460                            tb.processEndTag("p");
461                        }
462                        tb.insertEmpty(startTag);
463                        tb.framesetOk(false);
464                    } else if (name.equals("image")) {
465                        if (tb.getFromStack("svg") == null)
466                            return tb.process(startTag.name("img")); // change <image> to <img>, unless in svg
467                        else
468                            tb.insert(startTag);
469                    } else if (name.equals("isindex")) {
470                        // how much do we care about the early 90s?
471                        tb.error(this);
472                        if (tb.getFormElement() != null)
473                            return false;
474
475                        tb.processStartTag("form");
476                        if (startTag.attributes.hasKey("action")) {
477                            Element form = tb.getFormElement();
478                            form.attr("action", startTag.attributes.get("action"));
479                        }
480                        tb.processStartTag("hr");
481                        tb.processStartTag("label");
482                        // hope you like english.
483                        String prompt = startTag.attributes.hasKey("prompt") ?
484                                startTag.attributes.get("prompt") :
485                                "This is a searchable index. Enter search keywords: ";
486
487                        tb.process(new Token.Character().data(prompt));
488
489                        // input
490                        Attributes inputAttribs = new Attributes();
491                        for (Attribute attr : startTag.attributes) {
492                            if (!StringUtil.inSorted(attr.getKey(), Constants.InBodyStartInputAttribs))
493                                inputAttribs.put(attr);
494                        }
495                        inputAttribs.put("name", "isindex");
496                        tb.processStartTag("input", inputAttribs);
497                        tb.processEndTag("label");
498                        tb.processStartTag("hr");
499                        tb.processEndTag("form");
500                    } else if (name.equals("textarea")) {
501                        tb.insert(startTag);
502                        // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
503                        tb.tokeniser.transition(TokeniserState.Rcdata);
504                        tb.markInsertionMode();
505                        tb.framesetOk(false);
506                        tb.transition(Text);
507                    } else if (name.equals("xmp")) {
508                        if (tb.inButtonScope("p")) {
509                            tb.processEndTag("p");
510                        }
511                        tb.reconstructFormattingElements();
512                        tb.framesetOk(false);
513                        handleRawtext(startTag, tb);
514                    } else if (name.equals("iframe")) {
515                        tb.framesetOk(false);
516                        handleRawtext(startTag, tb);
517                    } else if (name.equals("noembed")) {
518                        // also handle noscript if script enabled
519                        handleRawtext(startTag, tb);
520                    } else if (name.equals("select")) {
521                        tb.reconstructFormattingElements();
522                        tb.insert(startTag);
523                        tb.framesetOk(false);
524
525                        HtmlTreeBuilderState state = tb.state();
526                        if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell))
527                            tb.transition(InSelectInTable);
528                        else
529                            tb.transition(InSelect);
530                    } else if (StringUtil.inSorted(name, Constants.InBodyStartOptions)) {
531                        if (tb.currentElement().nodeName().equals("option"))
532                            tb.processEndTag("option");
533                        tb.reconstructFormattingElements();
534                        tb.insert(startTag);
535                    } else if (StringUtil.inSorted(name, Constants.InBodyStartRuby)) {
536                        if (tb.inScope("ruby")) {
537                            tb.generateImpliedEndTags();
538                            if (!tb.currentElement().nodeName().equals("ruby")) {
539                                tb.error(this);
540                                tb.popStackToBefore("ruby"); // i.e. close up to but not include name
541                            }
542                            tb.insert(startTag);
543                        }
544                    } else if (name.equals("math")) {
545                        tb.reconstructFormattingElements();
546                        // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
547                        tb.insert(startTag);
548                    } else if (name.equals("svg")) {
549                        tb.reconstructFormattingElements();
550                        // todo: handle A start tag whose tag name is "svg" (xlink, svg)
551                        tb.insert(startTag);
552                    } else if (StringUtil.inSorted(name, Constants.InBodyStartDrop)) {
553                        tb.error(this);
554                        return false;
555                    } else {
556                        tb.reconstructFormattingElements();
557                        tb.insert(startTag);
558                    }
559                    break;
560
561                case EndTag:
562                    Token.EndTag endTag = t.asEndTag();
563                    name = endTag.normalName();
564                    if (StringUtil.inSorted(name, Constants.InBodyEndAdoptionFormatters)) {
565                        // Adoption Agency Algorithm.
566                        for (int i = 0; i < 8; i++) {
567                            Element formatEl = tb.getActiveFormattingElement(name);
568                            if (formatEl == null)
569                                return anyOtherEndTag(t, tb);
570                            else if (!tb.onStack(formatEl)) {
571                                tb.error(this);
572                                tb.removeFromActiveFormattingElements(formatEl);
573                                return true;
574                            } else if (!tb.inScope(formatEl.nodeName())) {
575                                tb.error(this);
576                                return false;
577                            } else if (tb.currentElement() != formatEl)
578                                tb.error(this);
579
580                            Element furthestBlock = null;
581                            Element commonAncestor = null;
582                            boolean seenFormattingElement = false;
583                            ArrayList<Element> stack = tb.getStack();
584                            // the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
585                            // run-aways
586                            final int stackSize = stack.size();
587                            for (int si = 0; si < stackSize && si < 64; si++) {
588                                Element el = stack.get(si);
589                                if (el == formatEl) {
590                                    commonAncestor = stack.get(si - 1);
591                                    seenFormattingElement = true;
592                                } else if (seenFormattingElement && tb.isSpecial(el)) {
593                                    furthestBlock = el;
594                                    break;
595                                }
596                            }
597                            if (furthestBlock == null) {
598                                tb.popStackToClose(formatEl.nodeName());
599                                tb.removeFromActiveFormattingElements(formatEl);
600                                return true;
601                            }
602
603                            // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
604                            // does that mean: int pos of format el in list?
605                            Element node = furthestBlock;
606                            Element lastNode = furthestBlock;
607                            for (int j = 0; j < 3; j++) {
608                                if (tb.onStack(node))
609                                    node = tb.aboveOnStack(node);
610                                if (!tb.isInActiveFormattingElements(node)) { // note no bookmark check
611                                    tb.removeFromStack(node);
612                                    continue;
613                                } else if (node == formatEl)
614                                    break;
615
616                                Element replacement = new Element(Tag.valueOf(node.nodeName(), ParseSettings.preserveCase), tb.getBaseUri());
617                                // case will follow the original node (so honours ParseSettings)
618                                tb.replaceActiveFormattingElement(node, replacement);
619                                tb.replaceOnStack(node, replacement);
620                                node = replacement;
621
622                                if (lastNode == furthestBlock) {
623                                    // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
624                                    // not getting how this bookmark both straddles the element above, but is inbetween here...
625                                }
626                                if (lastNode.parent() != null)
627                                    lastNode.remove();
628                                node.appendChild(lastNode);
629
630                                lastNode = node;
631                            }
632
633                            if (StringUtil.inSorted(commonAncestor.nodeName(), Constants.InBodyEndTableFosters)) {
634                                if (lastNode.parent() != null)
635                                    lastNode.remove();
636                                tb.insertInFosterParent(lastNode);
637                            } else {
638                                if (lastNode.parent() != null)
639                                    lastNode.remove();
640                                commonAncestor.appendChild(lastNode);
641                            }
642
643                            Element adopter = new Element(formatEl.tag(), tb.getBaseUri());
644                            adopter.attributes().addAll(formatEl.attributes());
645                            Node[] childNodes = furthestBlock.childNodes().toArray(new Node[furthestBlock.childNodeSize()]);
646                            for (Node childNode : childNodes) {
647                                adopter.appendChild(childNode); // append will reparent. thus the clone to avoid concurrent mod.
648                            }
649                            furthestBlock.appendChild(adopter);
650                            tb.removeFromActiveFormattingElements(formatEl);
651                            // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
652                            tb.removeFromStack(formatEl);
653                            tb.insertOnStackAfter(furthestBlock, adopter);
654                        }
655                    } else if (StringUtil.inSorted(name, Constants.InBodyEndClosers)) {
656                        if (!tb.inScope(name)) {
657                            // nothing to close
658                            tb.error(this);
659                            return false;
660                        } else {
661                            tb.generateImpliedEndTags();
662                            if (!tb.currentElement().nodeName().equals(name))
663                                tb.error(this);
664                            tb.popStackToClose(name);
665                        }
666                    } else if (name.equals("span")) {
667                        // same as final fall through, but saves short circuit
668                        return anyOtherEndTag(t, tb);
669                    } else if (name.equals("li")) {
670                        if (!tb.inListItemScope(name)) {
671                            tb.error(this);
672                            return false;
673                        } else {
674                            tb.generateImpliedEndTags(name);
675                            if (!tb.currentElement().nodeName().equals(name))
676                                tb.error(this);
677                            tb.popStackToClose(name);
678                        }
679                    } else if (name.equals("body")) {
680                        if (!tb.inScope("body")) {
681                            tb.error(this);
682                            return false;
683                        } else {
684                            // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
685                            tb.transition(AfterBody);
686                        }
687                    } else if (name.equals("html")) {
688                        boolean notIgnored = tb.processEndTag("body");
689                        if (notIgnored)
690                            return tb.process(endTag);
691                    } else if (name.equals("form")) {
692                        Element currentForm = tb.getFormElement();
693                        tb.setFormElement(null);
694                        if (currentForm == null || !tb.inScope(name)) {
695                            tb.error(this);
696                            return false;
697                        } else {
698                            tb.generateImpliedEndTags();
699                            if (!tb.currentElement().nodeName().equals(name))
700                                tb.error(this);
701                            // remove currentForm from stack. will shift anything under up.
702                            tb.removeFromStack(currentForm);
703                        }
704                    } else if (name.equals("p")) {
705                        if (!tb.inButtonScope(name)) {
706                            tb.error(this);
707                            tb.processStartTag(name); // if no p to close, creates an empty <p></p>
708                            return tb.process(endTag);
709                        } else {
710                            tb.generateImpliedEndTags(name);
711                            if (!tb.currentElement().nodeName().equals(name))
712                                tb.error(this);
713                            tb.popStackToClose(name);
714                        }
715                    } else if (StringUtil.inSorted(name, Constants.DdDt)) {
716                        if (!tb.inScope(name)) {
717                            tb.error(this);
718                            return false;
719                        } else {
720                            tb.generateImpliedEndTags(name);
721                            if (!tb.currentElement().nodeName().equals(name))
722                                tb.error(this);
723                            tb.popStackToClose(name);
724                        }
725                    } else if (StringUtil.inSorted(name, Constants.Headings)) {
726                        if (!tb.inScope(Constants.Headings)) {
727                            tb.error(this);
728                            return false;
729                        } else {
730                            tb.generateImpliedEndTags(name);
731                            if (!tb.currentElement().nodeName().equals(name))
732                                tb.error(this);
733                            tb.popStackToClose(Constants.Headings);
734                        }
735                    } else if (name.equals("sarcasm")) {
736                        // *sigh*
737                        return anyOtherEndTag(t, tb);
738                    } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) {
739                        if (!tb.inScope("name")) {
740                            if (!tb.inScope(name)) {
741                                tb.error(this);
742                                return false;
743                            }
744                            tb.generateImpliedEndTags();
745                            if (!tb.currentElement().nodeName().equals(name))
746                                tb.error(this);
747                            tb.popStackToClose(name);
748                            tb.clearFormattingElementsToLastMarker();
749                        }
750                    } else if (name.equals("br")) {
751                        tb.error(this);
752                        tb.processStartTag("br");
753                        return false;
754                    } else {
755                        return anyOtherEndTag(t, tb);
756                    }
757
758                    break;
759                case EOF:
760                    // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
761                    // stop parsing
762                    break;
763            }
764            return true;
765        }
766
767        boolean anyOtherEndTag(Token t, HtmlTreeBuilder tb) {
768            String name = tb.settings.normalizeTag(t.asEndTag().name()); // matches with case sensitivity if enabled
769            ArrayList<Element> stack = tb.getStack();
770            for (int pos = stack.size() -1; pos >= 0; pos--) {
771                Element node = stack.get(pos);
772                if (node.nodeName().equals(name)) {
773                    tb.generateImpliedEndTags(name);
774                    if (!name.equals(tb.currentElement().nodeName()))
775                        tb.error(this);
776                    tb.popStackToClose(name);
777                    break;
778                } else {
779                    if (tb.isSpecial(node)) {
780                        tb.error(this);
781                        return false;
782                    }
783                }
784            }
785            return true;
786        }
787    },
788    Text {
789        // in script, style etc. normally treated as data tags
790        boolean process(Token t, HtmlTreeBuilder tb) {
791            if (t.isCharacter()) {
792                tb.insert(t.asCharacter());
793            } else if (t.isEOF()) {
794                tb.error(this);
795                // if current node is script: already started
796                tb.pop();
797                tb.transition(tb.originalState());
798                return tb.process(t);
799            } else if (t.isEndTag()) {
800                // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts
801                tb.pop();
802                tb.transition(tb.originalState());
803            }
804            return true;
805        }
806    },
807    InTable {
808        boolean process(Token t, HtmlTreeBuilder tb) {
809            if (t.isCharacter()) {
810                tb.newPendingTableCharacters();
811                tb.markInsertionMode();
812                tb.transition(InTableText);
813                return tb.process(t);
814            } else if (t.isComment()) {
815                tb.insert(t.asComment());
816                return true;
817            } else if (t.isDoctype()) {
818                tb.error(this);
819                return false;
820            } else if (t.isStartTag()) {
821                Token.StartTag startTag = t.asStartTag();
822                String name = startTag.normalName();
823                if (name.equals("caption")) {
824                    tb.clearStackToTableContext();
825                    tb.insertMarkerToFormattingElements();
826                    tb.insert(startTag);
827                    tb.transition(InCaption);
828                } else if (name.equals("colgroup")) {
829                    tb.clearStackToTableContext();
830                    tb.insert(startTag);
831                    tb.transition(InColumnGroup);
832                } else if (name.equals("col")) {
833                    tb.processStartTag("colgroup");
834                    return tb.process(t);
835                } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
836                    tb.clearStackToTableContext();
837                    tb.insert(startTag);
838                    tb.transition(InTableBody);
839                } else if (StringUtil.in(name, "td", "th", "tr")) {
840                    tb.processStartTag("tbody");
841                    return tb.process(t);
842                } else if (name.equals("table")) {
843                    tb.error(this);
844                    boolean processed = tb.processEndTag("table");
845                    if (processed) // only ignored if in fragment
846                        return tb.process(t);
847                } else if (StringUtil.in(name, "style", "script")) {
848                    return tb.process(t, InHead);
849                } else if (name.equals("input")) {
850                    if (!startTag.attributes.get("type").equalsIgnoreCase("hidden")) {
851                        return anythingElse(t, tb);
852                    } else {
853                        tb.insertEmpty(startTag);
854                    }
855                } else if (name.equals("form")) {
856                    tb.error(this);
857                    if (tb.getFormElement() != null)
858                        return false;
859                    else {
860                        tb.insertForm(startTag, false);
861                    }
862                } else {
863                    return anythingElse(t, tb);
864                }
865                return true; // todo: check if should return processed http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intable
866            } else if (t.isEndTag()) {
867                Token.EndTag endTag = t.asEndTag();
868                String name = endTag.normalName();
869
870                if (name.equals("table")) {
871                    if (!tb.inTableScope(name)) {
872                        tb.error(this);
873                        return false;
874                    } else {
875                        tb.popStackToClose("table");
876                    }
877                    tb.resetInsertionMode();
878                } else if (StringUtil.in(name,
879                        "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
880                    tb.error(this);
881                    return false;
882                } else {
883                    return anythingElse(t, tb);
884                }
885                return true; // todo: as above todo
886            } else if (t.isEOF()) {
887                if (tb.currentElement().nodeName().equals("html"))
888                    tb.error(this);
889                return true; // stops parsing
890            }
891            return anythingElse(t, tb);
892        }
893
894        boolean anythingElse(Token t, HtmlTreeBuilder tb) {
895            tb.error(this);
896            boolean processed;
897            if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
898                tb.setFosterInserts(true);
899                processed = tb.process(t, InBody);
900                tb.setFosterInserts(false);
901            } else {
902                processed = tb.process(t, InBody);
903            }
904            return processed;
905        }
906    },
907    InTableText {
908        boolean process(Token t, HtmlTreeBuilder tb) {
909            switch (t.type) {
910                case Character:
911                    Token.Character c = t.asCharacter();
912                    if (c.getData().equals(nullString)) {
913                        tb.error(this);
914                        return false;
915                    } else {
916                        tb.getPendingTableCharacters().add(c.getData());
917                    }
918                    break;
919                default:
920                    // todo - don't really like the way these table character data lists are built
921                    if (tb.getPendingTableCharacters().size() > 0) {
922                        for (String character : tb.getPendingTableCharacters()) {
923                            if (!isWhitespace(character)) {
924                                // InTable anything else section:
925                                tb.error(this);
926                                if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
927                                    tb.setFosterInserts(true);
928                                    tb.process(new Token.Character().data(character), InBody);
929                                    tb.setFosterInserts(false);
930                                } else {
931                                    tb.process(new Token.Character().data(character), InBody);
932                                }
933                            } else
934                                tb.insert(new Token.Character().data(character));
935                        }
936                        tb.newPendingTableCharacters();
937                    }
938                    tb.transition(tb.originalState());
939                    return tb.process(t);
940            }
941            return true;
942        }
943    },
944    InCaption {
945        boolean process(Token t, HtmlTreeBuilder tb) {
946            if (t.isEndTag() && t.asEndTag().normalName().equals("caption")) {
947                Token.EndTag endTag = t.asEndTag();
948                String name = endTag.normalName();
949                if (!tb.inTableScope(name)) {
950                    tb.error(this);
951                    return false;
952                } else {
953                    tb.generateImpliedEndTags();
954                    if (!tb.currentElement().nodeName().equals("caption"))
955                        tb.error(this);
956                    tb.popStackToClose("caption");
957                    tb.clearFormattingElementsToLastMarker();
958                    tb.transition(InTable);
959                }
960            } else if ((
961                    t.isStartTag() && StringUtil.in(t.asStartTag().normalName(),
962                            "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
963                            t.isEndTag() && t.asEndTag().normalName().equals("table"))
964                    ) {
965                tb.error(this);
966                boolean processed = tb.processEndTag("caption");
967                if (processed)
968                    return tb.process(t);
969            } else if (t.isEndTag() && StringUtil.in(t.asEndTag().normalName(),
970                    "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
971                tb.error(this);
972                return false;
973            } else {
974                return tb.process(t, InBody);
975            }
976            return true;
977        }
978    },
979    InColumnGroup {
980        boolean process(Token t, HtmlTreeBuilder tb) {
981            if (isWhitespace(t)) {
982                tb.insert(t.asCharacter());
983                return true;
984            }
985            switch (t.type) {
986                case Comment:
987                    tb.insert(t.asComment());
988                    break;
989                case Doctype:
990                    tb.error(this);
991                    break;
992                case StartTag:
993                    Token.StartTag startTag = t.asStartTag();
994                    switch (startTag.normalName()) {
995                        case "html":
996                            return tb.process(t, InBody);
997                        case "col":
998                            tb.insertEmpty(startTag);
999                            break;
1000                        default:
1001                            return anythingElse(t, tb);
1002                    }
1003                    break;
1004                case EndTag:
1005                    Token.EndTag endTag = t.asEndTag();
1006                    if (endTag.normalName.equals("colgroup")) {
1007                        if (tb.currentElement().nodeName().equals("html")) { // frag case
1008                            tb.error(this);
1009                            return false;
1010                        } else {
1011                            tb.pop();
1012                            tb.transition(InTable);
1013                        }
1014                    } else
1015                        return anythingElse(t, tb);
1016                    break;
1017                case EOF:
1018                    if (tb.currentElement().nodeName().equals("html"))
1019                        return true; // stop parsing; frag case
1020                    else
1021                        return anythingElse(t, tb);
1022                default:
1023                    return anythingElse(t, tb);
1024            }
1025            return true;
1026        }
1027
1028        private boolean anythingElse(Token t, TreeBuilder tb) {
1029            boolean processed = tb.processEndTag("colgroup");
1030            if (processed) // only ignored in frag case
1031                return tb.process(t);
1032            return true;
1033        }
1034    },
1035    InTableBody {
1036        boolean process(Token t, HtmlTreeBuilder tb) {
1037            switch (t.type) {
1038                case StartTag:
1039                    Token.StartTag startTag = t.asStartTag();
1040                    String name = startTag.normalName();
1041                    if (name.equals("template")) {
1042                        tb.insert(startTag);
1043                    } else if (name.equals("tr")) {
1044                        tb.clearStackToTableBodyContext();
1045                        tb.insert(startTag);
1046                        tb.transition(InRow);
1047                    } else if (StringUtil.in(name, "th", "td")) {
1048                        tb.error(this);
1049                        tb.processStartTag("tr");
1050                        return tb.process(startTag);
1051                    } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
1052                        return exitTableBody(t, tb);
1053                    } else
1054                        return anythingElse(t, tb);
1055                    break;
1056                case EndTag:
1057                    Token.EndTag endTag = t.asEndTag();
1058                    name = endTag.normalName();
1059                    if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
1060                        if (!tb.inTableScope(name)) {
1061                            tb.error(this);
1062                            return false;
1063                        } else {
1064                            tb.clearStackToTableBodyContext();
1065                            tb.pop();
1066                            tb.transition(InTable);
1067                        }
1068                    } else if (name.equals("table")) {
1069                        return exitTableBody(t, tb);
1070                    } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
1071                        tb.error(this);
1072                        return false;
1073                    } else
1074                        return anythingElse(t, tb);
1075                    break;
1076                default:
1077                    return anythingElse(t, tb);
1078            }
1079            return true;
1080        }
1081
1082        private boolean exitTableBody(Token t, HtmlTreeBuilder tb) {
1083            if (!(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb.inScope("tfoot"))) {
1084                // frag case
1085                tb.error(this);
1086                return false;
1087            }
1088            tb.clearStackToTableBodyContext();
1089            tb.processEndTag(tb.currentElement().nodeName()); // tbody, tfoot, thead
1090            return tb.process(t);
1091        }
1092
1093        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
1094            return tb.process(t, InTable);
1095        }
1096    },
1097    InRow {
1098        boolean process(Token t, HtmlTreeBuilder tb) {
1099            if (t.isStartTag()) {
1100                Token.StartTag startTag = t.asStartTag();
1101                String name = startTag.normalName();
1102
1103                if (name.equals("template")) {
1104                    tb.insert(startTag);
1105                } else if (StringUtil.in(name, "th", "td")) {
1106                    tb.clearStackToTableRowContext();
1107                    tb.insert(startTag);
1108                    tb.transition(InCell);
1109                    tb.insertMarkerToFormattingElements();
1110                } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) {
1111                    return handleMissingTr(t, tb);
1112                } else {
1113                    return anythingElse(t, tb);
1114                }
1115            } else if (t.isEndTag()) {
1116                Token.EndTag endTag = t.asEndTag();
1117                String name = endTag.normalName();
1118
1119                if (name.equals("tr")) {
1120                    if (!tb.inTableScope(name)) {
1121                        tb.error(this); // frag
1122                        return false;
1123                    }
1124                    tb.clearStackToTableRowContext();
1125                    tb.pop(); // tr
1126                    tb.transition(InTableBody);
1127                } else if (name.equals("table")) {
1128                    return handleMissingTr(t, tb);
1129                } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
1130                    if (!tb.inTableScope(name)) {
1131                        tb.error(this);
1132                        return false;
1133                    }
1134                    tb.processEndTag("tr");
1135                    return tb.process(t);
1136                } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th")) {
1137                    tb.error(this);
1138                    return false;
1139                } else {
1140                    return anythingElse(t, tb);
1141                }
1142            } else {
1143                return anythingElse(t, tb);
1144            }
1145            return true;
1146        }
1147
1148        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
1149            return tb.process(t, InTable);
1150        }
1151
1152        private boolean handleMissingTr(Token t, TreeBuilder tb) {
1153            boolean processed = tb.processEndTag("tr");
1154            if (processed)
1155                return tb.process(t);
1156            else
1157                return false;
1158        }
1159    },
1160    InCell {
1161        boolean process(Token t, HtmlTreeBuilder tb) {
1162            if (t.isEndTag()) {
1163                Token.EndTag endTag = t.asEndTag();
1164                String name = endTag.normalName();
1165
1166                if (StringUtil.in(name, "td", "th")) {
1167                    if (!tb.inTableScope(name)) {
1168                        tb.error(this);
1169                        tb.transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag
1170                        return false;
1171                    }
1172                    tb.generateImpliedEndTags();
1173                    if (!tb.currentElement().nodeName().equals(name))
1174                        tb.error(this);
1175                    tb.popStackToClose(name);
1176                    tb.clearFormattingElementsToLastMarker();
1177                    tb.transition(InRow);
1178                } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html")) {
1179                    tb.error(this);
1180                    return false;
1181                } else if (StringUtil.in(name, "table", "tbody", "tfoot", "thead", "tr")) {
1182                    if (!tb.inTableScope(name)) {
1183                        tb.error(this);
1184                        return false;
1185                    }
1186                    closeCell(tb);
1187                    return tb.process(t);
1188                } else {
1189                    return anythingElse(t, tb);
1190                }
1191            } else if (t.isStartTag() &&
1192                    StringUtil.in(t.asStartTag().normalName(),
1193                            "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
1194                if (!(tb.inTableScope("td") || tb.inTableScope("th"))) {
1195                    tb.error(this);
1196                    return false;
1197                }
1198                closeCell(tb);
1199                return tb.process(t);
1200            } else {
1201                return anythingElse(t, tb);
1202            }
1203            return true;
1204        }
1205
1206        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
1207            return tb.process(t, InBody);
1208        }
1209
1210        private void closeCell(HtmlTreeBuilder tb) {
1211            if (tb.inTableScope("td"))
1212                tb.processEndTag("td");
1213            else
1214                tb.processEndTag("th"); // only here if th or td in scope
1215        }
1216    },
1217    InSelect {
1218        boolean process(Token t, HtmlTreeBuilder tb) {
1219            switch (t.type) {
1220                case Character:
1221                    Token.Character c = t.asCharacter();
1222                    if (c.getData().equals(nullString)) {
1223                        tb.error(this);
1224                        return false;
1225                    } else {
1226                        tb.insert(c);
1227                    }
1228                    break;
1229                case Comment:
1230                    tb.insert(t.asComment());
1231                    break;
1232                case Doctype:
1233                    tb.error(this);
1234                    return false;
1235                case StartTag:
1236                    Token.StartTag start = t.asStartTag();
1237                    String name = start.normalName();
1238                    if (name.equals("html"))
1239                        return tb.process(start, InBody);
1240                    else if (name.equals("option")) {
1241                        if (tb.currentElement().nodeName().equals("option"))
1242                            tb.processEndTag("option");
1243                        tb.insert(start);
1244                    } else if (name.equals("optgroup")) {
1245                        if (tb.currentElement().nodeName().equals("option"))
1246                            tb.processEndTag("option");
1247                        else if (tb.currentElement().nodeName().equals("optgroup"))
1248                            tb.processEndTag("optgroup");
1249                        tb.insert(start);
1250                    } else if (name.equals("select")) {
1251                        tb.error(this);
1252                        return tb.processEndTag("select");
1253                    } else if (StringUtil.in(name, "input", "keygen", "textarea")) {
1254                        tb.error(this);
1255                        if (!tb.inSelectScope("select"))
1256                            return false; // frag
1257                        tb.processEndTag("select");
1258                        return tb.process(start);
1259                    } else if (name.equals("script")) {
1260                        return tb.process(t, InHead);
1261                    } else {
1262                        return anythingElse(t, tb);
1263                    }
1264                    break;
1265                case EndTag:
1266                    Token.EndTag end = t.asEndTag();
1267                    name = end.normalName();
1268                    switch (name) {
1269                        case "optgroup":
1270                            if (tb.currentElement().nodeName().equals("option") && tb.aboveOnStack(tb.currentElement()) != null && tb.aboveOnStack(tb.currentElement()).nodeName().equals("optgroup"))
1271                                tb.processEndTag("option");
1272                            if (tb.currentElement().nodeName().equals("optgroup"))
1273                                tb.pop();
1274                            else
1275                                tb.error(this);
1276                            break;
1277                        case "option":
1278                            if (tb.currentElement().nodeName().equals("option"))
1279                                tb.pop();
1280                            else
1281                                tb.error(this);
1282                            break;
1283                        case "select":
1284                            if (!tb.inSelectScope(name)) {
1285                                tb.error(this);
1286                                return false;
1287                            } else {
1288                                tb.popStackToClose(name);
1289                                tb.resetInsertionMode();
1290                            }
1291                            break;
1292                        default:
1293                            return anythingElse(t, tb);
1294                    }
1295                    break;
1296                case EOF:
1297                    if (!tb.currentElement().nodeName().equals("html"))
1298                        tb.error(this);
1299                    break;
1300                default:
1301                    return anythingElse(t, tb);
1302            }
1303            return true;
1304        }
1305
1306        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
1307            tb.error(this);
1308            return false;
1309        }
1310    },
1311    InSelectInTable {
1312        boolean process(Token t, HtmlTreeBuilder tb) {
1313            if (t.isStartTag() && StringUtil.in(t.asStartTag().normalName(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
1314                tb.error(this);
1315                tb.processEndTag("select");
1316                return tb.process(t);
1317            } else if (t.isEndTag() && StringUtil.in(t.asEndTag().normalName(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
1318                tb.error(this);
1319                if (tb.inTableScope(t.asEndTag().normalName())) {
1320                    tb.processEndTag("select");
1321                    return (tb.process(t));
1322                } else
1323                    return false;
1324            } else {
1325                return tb.process(t, InSelect);
1326            }
1327        }
1328    },
1329    AfterBody {
1330        boolean process(Token t, HtmlTreeBuilder tb) {
1331            if (isWhitespace(t)) {
1332                return tb.process(t, InBody);
1333            } else if (t.isComment()) {
1334                tb.insert(t.asComment()); // into html node
1335            } else if (t.isDoctype()) {
1336                tb.error(this);
1337                return false;
1338            } else if (t.isStartTag() && t.asStartTag().normalName().equals("html")) {
1339                return tb.process(t, InBody);
1340            } else if (t.isEndTag() && t.asEndTag().normalName().equals("html")) {
1341                if (tb.isFragmentParsing()) {
1342                    tb.error(this);
1343                    return false;
1344                } else {
1345                    tb.transition(AfterAfterBody);
1346                }
1347            } else if (t.isEOF()) {
1348                // chillax! we're done
1349            } else {
1350                tb.error(this);
1351                tb.transition(InBody);
1352                return tb.process(t);
1353            }
1354            return true;
1355        }
1356    },
1357    InFrameset {
1358        boolean process(Token t, HtmlTreeBuilder tb) {
1359            if (isWhitespace(t)) {
1360                tb.insert(t.asCharacter());
1361            } else if (t.isComment()) {
1362                tb.insert(t.asComment());
1363            } else if (t.isDoctype()) {
1364                tb.error(this);
1365                return false;
1366            } else if (t.isStartTag()) {
1367                Token.StartTag start = t.asStartTag();
1368                switch (start.normalName()) {
1369                    case "html":
1370                        return tb.process(start, InBody);
1371                    case "frameset":
1372                        tb.insert(start);
1373                        break;
1374                    case "frame":
1375                        tb.insertEmpty(start);
1376                        break;
1377                    case "noframes":
1378                        return tb.process(start, InHead);
1379                    default:
1380                        tb.error(this);
1381                        return false;
1382                }
1383            } else if (t.isEndTag() && t.asEndTag().normalName().equals("frameset")) {
1384                if (tb.currentElement().nodeName().equals("html")) { // frag
1385                    tb.error(this);
1386                    return false;
1387                } else {
1388                    tb.pop();
1389                    if (!tb.isFragmentParsing() && !tb.currentElement().nodeName().equals("frameset")) {
1390                        tb.transition(AfterFrameset);
1391                    }
1392                }
1393            } else if (t.isEOF()) {
1394                if (!tb.currentElement().nodeName().equals("html")) {
1395                    tb.error(this);
1396                    return true;
1397                }
1398            } else {
1399                tb.error(this);
1400                return false;
1401            }
1402            return true;
1403        }
1404    },
1405    AfterFrameset {
1406        boolean process(Token t, HtmlTreeBuilder tb) {
1407            if (isWhitespace(t)) {
1408                tb.insert(t.asCharacter());
1409            } else if (t.isComment()) {
1410                tb.insert(t.asComment());
1411            } else if (t.isDoctype()) {
1412                tb.error(this);
1413                return false;
1414            } else if (t.isStartTag() && t.asStartTag().normalName().equals("html")) {
1415                return tb.process(t, InBody);
1416            } else if (t.isEndTag() && t.asEndTag().normalName().equals("html")) {
1417                tb.transition(AfterAfterFrameset);
1418            } else if (t.isStartTag() && t.asStartTag().normalName().equals("noframes")) {
1419                return tb.process(t, InHead);
1420            } else if (t.isEOF()) {
1421                // cool your heels, we're complete
1422            } else {
1423                tb.error(this);
1424                return false;
1425            }
1426            return true;
1427        }
1428    },
1429    AfterAfterBody {
1430        boolean process(Token t, HtmlTreeBuilder tb) {
1431            if (t.isComment()) {
1432                tb.insert(t.asComment());
1433            } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().normalName().equals("html"))) {
1434                return tb.process(t, InBody);
1435            } else if (t.isEOF()) {
1436                // nice work chuck
1437            } else {
1438                tb.error(this);
1439                tb.transition(InBody);
1440                return tb.process(t);
1441            }
1442            return true;
1443        }
1444    },
1445    AfterAfterFrameset {
1446        boolean process(Token t, HtmlTreeBuilder tb) {
1447            if (t.isComment()) {
1448                tb.insert(t.asComment());
1449            } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().normalName().equals("html"))) {
1450                return tb.process(t, InBody);
1451            } else if (t.isEOF()) {
1452                // nice work chuck
1453            } else if (t.isStartTag() && t.asStartTag().normalName().equals("noframes")) {
1454                return tb.process(t, InHead);
1455            } else {
1456                tb.error(this);
1457                return false;
1458            }
1459            return true;
1460        }
1461    },
1462    ForeignContent {
1463        boolean process(Token t, HtmlTreeBuilder tb) {
1464            return true;
1465            // todo: implement. Also; how do we get here?
1466        }
1467    };
1468
1469    private static String nullString = String.valueOf('\u0000');
1470
1471    abstract boolean process(Token t, HtmlTreeBuilder tb);
1472
1473    private static boolean isWhitespace(Token t) {
1474        if (t.isCharacter()) {
1475            String data = t.asCharacter().getData();
1476            return isWhitespace(data);
1477        }
1478        return false;
1479    }
1480
1481    private static boolean isWhitespace(String data) {
1482        // todo: this checks more than spec - "\t", "\n", "\f", "\r", " "
1483        for (int i = 0; i < data.length(); i++) {
1484            char c = data.charAt(i);
1485            if (!StringUtil.isWhitespace(c))
1486                return false;
1487        }
1488        return true;
1489    }
1490
1491    private static void handleRcData(Token.StartTag startTag, HtmlTreeBuilder tb) {
1492        tb.tokeniser.transition(TokeniserState.Rcdata);
1493        tb.markInsertionMode();
1494        tb.transition(Text);
1495        tb.insert(startTag);
1496    }
1497
1498    private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) {
1499        tb.tokeniser.transition(TokeniserState.Rawtext);
1500        tb.markInsertionMode();
1501        tb.transition(Text);
1502        tb.insert(startTag);
1503    }
1504
1505    // lists of tags to search through. A little harder to read here, but causes less GC than dynamic varargs.
1506    // was contributing around 10% of parse GC load.
1507    // must make sure these are sorted, as used in findSorted. MUST update HtmlTreebuilderStateTest if more arrays added.
1508    static final class Constants {
1509        static final String[] InBodyStartToHead = new String[]{"base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title"};
1510        static final String[] InBodyStartPClosers = new String[]{"address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl",
1511            "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol",
1512            "p", "section", "summary", "ul"};
1513        static final String[] Headings = new String[]{"h1", "h2", "h3", "h4", "h5", "h6"};
1514        static final String[] InBodyStartPreListing = new String[]{"listing", "pre"};
1515        static final String[] InBodyStartLiBreakers = new String[]{"address", "div", "p"};
1516        static final String[] DdDt = new String[]{"dd", "dt"};
1517        static final String[] Formatters = new String[]{"b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u"};
1518        static final String[] InBodyStartApplets = new String[]{"applet", "marquee", "object"};
1519        static final String[] InBodyStartEmptyFormatters = new String[]{"area", "br", "embed", "img", "keygen", "wbr"};
1520        static final String[] InBodyStartMedia = new String[]{"param", "source", "track"};
1521        static final String[] InBodyStartInputAttribs = new String[]{"action", "name", "prompt"};
1522        static final String[] InBodyStartOptions = new String[]{"optgroup", "option"};
1523        static final String[] InBodyStartRuby = new String[]{"rp", "rt"};
1524        static final String[] InBodyStartDrop = new String[]{"caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr"};
1525        static final String[] InBodyEndClosers = new String[]{"address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div",
1526            "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu",
1527            "nav", "ol", "pre", "section", "summary", "ul"};
1528        static final String[] InBodyEndAdoptionFormatters = new String[]{"a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u"};
1529        static final String[] InBodyEndTableFosters = new String[]{"table", "tbody", "tfoot", "thead", "tr"};
1530    }
1531}