001package org.jsoup.parser; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.nodes.Attributes; 005import org.jsoup.nodes.Document; 006import org.jsoup.nodes.Element; 007 008import java.io.Reader; 009import java.util.ArrayList; 010 011/** 012 * @author Jonathan Hedley 013 */ 014abstract class TreeBuilder { 015 CharacterReader reader; 016 Tokeniser tokeniser; 017 protected Document doc; // current doc we are building into 018 protected ArrayList<Element> stack; // the stack of open elements 019 protected String baseUri; // current base uri, for creating new elements 020 protected Token currentToken; // currentToken is used only for error tracking. 021 protected ParseErrorList errors; // null when not tracking errors 022 protected ParseSettings settings; 023 024 private Token.StartTag start = new Token.StartTag(); // start tag to process 025 private Token.EndTag end = new Token.EndTag(); 026 027 abstract ParseSettings defaultSettings(); 028 029 protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { 030 Validate.notNull(input, "String input must not be null"); 031 Validate.notNull(baseUri, "BaseURI must not be null"); 032 033 doc = new Document(baseUri); 034 this.settings = settings; 035 reader = new CharacterReader(input); 036 this.errors = errors; 037 currentToken = null; 038 tokeniser = new Tokeniser(reader, errors); 039 stack = new ArrayList<>(32); 040 this.baseUri = baseUri; 041 } 042 043 Document parse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { 044 initialiseParse(input, baseUri, errors, settings); 045 runParser(); 046 return doc; 047 } 048 049 protected void runParser() { 050 while (true) { 051 Token token = tokeniser.read(); 052 process(token); 053 token.reset(); 054 055 if (token.type == Token.TokenType.EOF) 056 break; 057 } 058 } 059 060 protected abstract boolean process(Token token); 061 062 protected boolean processStartTag(String name) { 063 if (currentToken == start) { // don't recycle an in-use token 064 return process(new Token.StartTag().name(name)); 065 } 066 return process(start.reset().name(name)); 067 } 068 069 public boolean processStartTag(String name, Attributes attrs) { 070 if (currentToken == start) { // don't recycle an in-use token 071 return process(new Token.StartTag().nameAttr(name, attrs)); 072 } 073 start.reset(); 074 start.nameAttr(name, attrs); 075 return process(start); 076 } 077 078 protected boolean processEndTag(String name) { 079 if (currentToken == end) { // don't recycle an in-use token 080 return process(new Token.EndTag().name(name)); 081 } 082 return process(end.reset().name(name)); 083 } 084 085 086 protected Element currentElement() { 087 int size = stack.size(); 088 return size > 0 ? stack.get(size-1) : null; 089 } 090}