001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.nodes.Attributes;
005import org.jsoup.nodes.Document;
006import org.jsoup.nodes.Element;
007
008import java.io.Reader;
009import java.util.ArrayList;
010
011/**
012 * @author Jonathan Hedley
013 */
014abstract class TreeBuilder {
015    CharacterReader reader;
016    Tokeniser tokeniser;
017    protected Document doc; // current doc we are building into
018    protected ArrayList<Element> stack; // the stack of open elements
019    protected String baseUri; // current base uri, for creating new elements
020    protected Token currentToken; // currentToken is used only for error tracking.
021    protected ParseErrorList errors; // null when not tracking errors
022    protected ParseSettings settings;
023
024    private Token.StartTag start = new Token.StartTag(); // start tag to process
025    private Token.EndTag end  = new Token.EndTag();
026
027    abstract ParseSettings defaultSettings();
028
029    protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) {
030        Validate.notNull(input, "String input must not be null");
031        Validate.notNull(baseUri, "BaseURI must not be null");
032
033        doc = new Document(baseUri);
034        this.settings = settings;
035        reader = new CharacterReader(input);
036        this.errors = errors;
037        currentToken = null;
038        tokeniser = new Tokeniser(reader, errors);
039        stack = new ArrayList<>(32);
040        this.baseUri = baseUri;
041    }
042
043    Document parse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) {
044        initialiseParse(input, baseUri, errors, settings);
045        runParser();
046        return doc;
047    }
048
049    protected void runParser() {
050        while (true) {
051            Token token = tokeniser.read();
052            process(token);
053            token.reset();
054
055            if (token.type == Token.TokenType.EOF)
056                break;
057        }
058    }
059
060    protected abstract boolean process(Token token);
061
062    protected boolean processStartTag(String name) {
063        if (currentToken == start) { // don't recycle an in-use token
064            return process(new Token.StartTag().name(name));
065        }
066        return process(start.reset().name(name));
067    }
068
069    public boolean processStartTag(String name, Attributes attrs) {
070        if (currentToken == start) { // don't recycle an in-use token
071            return process(new Token.StartTag().nameAttr(name, attrs));
072        }
073        start.reset();
074        start.nameAttr(name, attrs);
075        return process(start);
076    }
077
078    protected boolean processEndTag(String name) {
079        if (currentToken == end) { // don't recycle an in-use token
080            return process(new Token.EndTag().name(name));
081        }
082        return process(end.reset().name(name));
083    }
084
085
086    protected Element currentElement() {
087        int size = stack.size();
088        return size > 0 ? stack.get(size-1) : null;
089    }
090}