001package org.json; 002 003/* 004Copyright (c) 2002 JSON.org 005 006Permission is hereby granted, free of charge, to any person obtaining a copy 007of this software and associated documentation files (the "Software"), to deal 008in the Software without restriction, including without limitation the rights 009to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 010copies of the Software, and to permit persons to whom the Software is 011furnished to do so, subject to the following conditions: 012 013The above copyright notice and this permission notice shall be included in all 014copies or substantial portions of the Software. 015 016The Software shall be used for Good, not Evil. 017 018THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 019IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 020FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 021AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 022LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 023OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 024SOFTWARE. 025*/ 026 027/** 028 * The XMLTokener extends the JSONTokener to provide additional methods 029 * for the parsing of XML texts. 030 * @author JSON.org 031 * @version 2015-12-09 032 */ 033public class XMLTokener extends JSONTokener { 034 035 036 /** The table of entity values. It initially contains Character values for 037 * amp, apos, gt, lt, quot. 038 */ 039 public static final java.util.HashMap<String, Character> entity; 040 041 static { 042 entity = new java.util.HashMap<String, Character>(8); 043 entity.put("amp", XML.AMP); 044 entity.put("apos", XML.APOS); 045 entity.put("gt", XML.GT); 046 entity.put("lt", XML.LT); 047 entity.put("quot", XML.QUOT); 048 } 049 050 /** 051 * Construct an XMLTokener from a string. 052 * @param s A source string. 053 */ 054 public XMLTokener(String s) { 055 super(s); 056 } 057 058 /** 059 * Get the text in the CDATA block. 060 * @return The string up to the <code>]]></code>. 061 * @throws JSONException If the <code>]]></code> is not found. 062 */ 063 public String nextCDATA() throws JSONException { 064 char c; 065 int i; 066 StringBuilder sb = new StringBuilder(); 067 for (;;) { 068 c = next(); 069 if (end()) { 070 throw syntaxError("Unclosed CDATA"); 071 } 072 sb.append(c); 073 i = sb.length() - 3; 074 if (i >= 0 && sb.charAt(i) == ']' && 075 sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') { 076 sb.setLength(i); 077 return sb.toString(); 078 } 079 } 080 } 081 082 083 /** 084 * Get the next XML outer token, trimming whitespace. There are two kinds 085 * of tokens: the '<' character which begins a markup tag, and the content 086 * text between markup tags. 087 * 088 * @return A string, or a '<' Character, or null if there is no more 089 * source text. 090 * @throws JSONException 091 */ 092 public Object nextContent() throws JSONException { 093 char c; 094 StringBuilder sb; 095 do { 096 c = next(); 097 } while (Character.isWhitespace(c)); 098 if (c == 0) { 099 return null; 100 } 101 if (c == '<') { 102 return XML.LT; 103 } 104 sb = new StringBuilder(); 105 for (;;) { 106 if (c == '<' || c == 0) { 107 back(); 108 return sb.toString().trim(); 109 } 110 if (c == '&') { 111 sb.append(nextEntity(c)); 112 } else { 113 sb.append(c); 114 } 115 c = next(); 116 } 117 } 118 119 120 /** 121 * Return the next entity. These entities are translated to Characters: 122 * <code>& ' > < "</code>. 123 * @param ampersand An ampersand character. 124 * @return A Character or an entity String if the entity is not recognized. 125 * @throws JSONException If missing ';' in XML entity. 126 */ 127 public Object nextEntity(char ampersand) throws JSONException { 128 StringBuilder sb = new StringBuilder(); 129 for (;;) { 130 char c = next(); 131 if (Character.isLetterOrDigit(c) || c == '#') { 132 sb.append(Character.toLowerCase(c)); 133 } else if (c == ';') { 134 break; 135 } else { 136 throw syntaxError("Missing ';' in XML entity: &" + sb); 137 } 138 } 139 String string = sb.toString(); 140 Object object = entity.get(string); 141 return object != null ? object : ampersand + string + ";"; 142 } 143 144 145 /** 146 * Returns the next XML meta token. This is used for skipping over <!...> 147 * and <?...?> structures. 148 * @return Syntax characters (<code>< > / = ! ?</code>) are returned as 149 * Character, and strings and names are returned as Boolean. We don't care 150 * what the values actually are. 151 * @throws JSONException If a string is not properly closed or if the XML 152 * is badly structured. 153 */ 154 public Object nextMeta() throws JSONException { 155 char c; 156 char q; 157 do { 158 c = next(); 159 } while (Character.isWhitespace(c)); 160 switch (c) { 161 case 0: 162 throw syntaxError("Misshaped meta tag"); 163 case '<': 164 return XML.LT; 165 case '>': 166 return XML.GT; 167 case '/': 168 return XML.SLASH; 169 case '=': 170 return XML.EQ; 171 case '!': 172 return XML.BANG; 173 case '?': 174 return XML.QUEST; 175 case '"': 176 case '\'': 177 q = c; 178 for (;;) { 179 c = next(); 180 if (c == 0) { 181 throw syntaxError("Unterminated string"); 182 } 183 if (c == q) { 184 return Boolean.TRUE; 185 } 186 } 187 default: 188 for (;;) { 189 c = next(); 190 if (Character.isWhitespace(c)) { 191 return Boolean.TRUE; 192 } 193 switch (c) { 194 case 0: 195 case '<': 196 case '>': 197 case '/': 198 case '=': 199 case '!': 200 case '?': 201 case '"': 202 case '\'': 203 back(); 204 return Boolean.TRUE; 205 } 206 } 207 } 208 } 209 210 211 /** 212 * Get the next XML Token. These tokens are found inside of angle 213 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it 214 * may be a string wrapped in single quotes or double quotes, or it may be a 215 * name. 216 * @return a String or a Character. 217 * @throws JSONException If the XML is not well formed. 218 */ 219 public Object nextToken() throws JSONException { 220 char c; 221 char q; 222 StringBuilder sb; 223 do { 224 c = next(); 225 } while (Character.isWhitespace(c)); 226 switch (c) { 227 case 0: 228 throw syntaxError("Misshaped element"); 229 case '<': 230 throw syntaxError("Misplaced '<'"); 231 case '>': 232 return XML.GT; 233 case '/': 234 return XML.SLASH; 235 case '=': 236 return XML.EQ; 237 case '!': 238 return XML.BANG; 239 case '?': 240 return XML.QUEST; 241 242// Quoted string 243 244 case '"': 245 case '\'': 246 q = c; 247 sb = new StringBuilder(); 248 for (;;) { 249 c = next(); 250 if (c == 0) { 251 throw syntaxError("Unterminated string"); 252 } 253 if (c == q) { 254 return sb.toString(); 255 } 256 if (c == '&') { 257 sb.append(nextEntity(c)); 258 } else { 259 sb.append(c); 260 } 261 } 262 default: 263 264// Name 265 266 sb = new StringBuilder(); 267 for (;;) { 268 sb.append(c); 269 c = next(); 270 if (Character.isWhitespace(c)) { 271 return sb.toString(); 272 } 273 switch (c) { 274 case 0: 275 return sb.toString(); 276 case '>': 277 case '/': 278 case '=': 279 case '!': 280 case '?': 281 case '[': 282 case ']': 283 back(); 284 return sb.toString(); 285 case '<': 286 case '"': 287 case '\'': 288 throw syntaxError("Bad character in a name"); 289 } 290 } 291 } 292 } 293 294 295 /** 296 * Skip characters until past the requested string. 297 * If it is not found, we are left at the end of the source with a result of false. 298 * @param to A string to skip past. 299 * @throws JSONException 300 */ 301 public boolean skipPast(String to) throws JSONException { 302 boolean b; 303 char c; 304 int i; 305 int j; 306 int offset = 0; 307 int length = to.length(); 308 char[] circle = new char[length]; 309 310 /* 311 * First fill the circle buffer with as many characters as are in the 312 * to string. If we reach an early end, bail. 313 */ 314 315 for (i = 0; i < length; i += 1) { 316 c = next(); 317 if (c == 0) { 318 return false; 319 } 320 circle[i] = c; 321 } 322 323 /* We will loop, possibly for all of the remaining characters. */ 324 325 for (;;) { 326 j = offset; 327 b = true; 328 329 /* Compare the circle buffer with the to string. */ 330 331 for (i = 0; i < length; i += 1) { 332 if (circle[j] != to.charAt(i)) { 333 b = false; 334 break; 335 } 336 j += 1; 337 if (j >= length) { 338 j -= length; 339 } 340 } 341 342 /* If we exit the loop with b intact, then victory is ours. */ 343 344 if (b) { 345 return true; 346 } 347 348 /* Get the next character. If there isn't one, then defeat is ours. */ 349 350 c = next(); 351 if (c == 0) { 352 return false; 353 } 354 /* 355 * Shove the character in the circle buffer and advance the 356 * circle offset. The offset is mod n. 357 */ 358 circle[offset] = c; 359 offset += 1; 360 if (offset >= length) { 361 offset -= length; 362 } 363 } 364 } 365}