001package org.json;
002
003/*
004Copyright (c) 2002 JSON.org
005
006Permission is hereby granted, free of charge, to any person obtaining a copy
007of this software and associated documentation files (the "Software"), to deal
008in the Software without restriction, including without limitation the rights
009to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
010copies of the Software, and to permit persons to whom the Software is
011furnished to do so, subject to the following conditions:
012
013The above copyright notice and this permission notice shall be included in all
014copies or substantial portions of the Software.
015
016The Software shall be used for Good, not Evil.
017
018THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
019IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
020FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
021AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
022LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
023OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
024SOFTWARE.
025*/
026
027/**
028 * The XMLTokener extends the JSONTokener to provide additional methods
029 * for the parsing of XML texts.
030 * @author JSON.org
031 * @version 2015-12-09
032 */
033public class XMLTokener extends JSONTokener {
034
035
036   /** The table of entity values. It initially contains Character values for
037    * amp, apos, gt, lt, quot.
038    */
039   public static final java.util.HashMap<String, Character> entity;
040
041   static {
042       entity = new java.util.HashMap<String, Character>(8);
043       entity.put("amp",  XML.AMP);
044       entity.put("apos", XML.APOS);
045       entity.put("gt",   XML.GT);
046       entity.put("lt",   XML.LT);
047       entity.put("quot", XML.QUOT);
048   }
049
050    /**
051     * Construct an XMLTokener from a string.
052     * @param s A source string.
053     */
054    public XMLTokener(String s) {
055        super(s);
056    }
057
058    /**
059     * Get the text in the CDATA block.
060     * @return The string up to the <code>]]&gt;</code>.
061     * @throws JSONException If the <code>]]&gt;</code> is not found.
062     */
063    public String nextCDATA() throws JSONException {
064        char         c;
065        int          i;
066        StringBuilder sb = new StringBuilder();
067        for (;;) {
068            c = next();
069            if (end()) {
070                throw syntaxError("Unclosed CDATA");
071            }
072            sb.append(c);
073            i = sb.length() - 3;
074            if (i >= 0 && sb.charAt(i) == ']' &&
075                          sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
076                sb.setLength(i);
077                return sb.toString();
078            }
079        }
080    }
081
082
083    /**
084     * Get the next XML outer token, trimming whitespace. There are two kinds
085     * of tokens: the '<' character which begins a markup tag, and the content
086     * text between markup tags.
087     *
088     * @return  A string, or a '<' Character, or null if there is no more
089     * source text.
090     * @throws JSONException
091     */
092    public Object nextContent() throws JSONException {
093        char         c;
094        StringBuilder sb;
095        do {
096            c = next();
097        } while (Character.isWhitespace(c));
098        if (c == 0) {
099            return null;
100        }
101        if (c == '<') {
102            return XML.LT;
103        }
104        sb = new StringBuilder();
105        for (;;) {
106            if (c == '<' || c == 0) {
107                back();
108                return sb.toString().trim();
109            }
110            if (c == '&') {
111                sb.append(nextEntity(c));
112            } else {
113                sb.append(c);
114            }
115            c = next();
116        }
117    }
118
119
120    /**
121     * Return the next entity. These entities are translated to Characters:
122     *     <code>&amp;  &apos;  &gt;  &lt;  &quot;</code>.
123     * @param ampersand An ampersand character.
124     * @return  A Character or an entity String if the entity is not recognized.
125     * @throws JSONException If missing ';' in XML entity.
126     */
127    public Object nextEntity(char ampersand) throws JSONException {
128        StringBuilder sb = new StringBuilder();
129        for (;;) {
130            char c = next();
131            if (Character.isLetterOrDigit(c) || c == '#') {
132                sb.append(Character.toLowerCase(c));
133            } else if (c == ';') {
134                break;
135            } else {
136                throw syntaxError("Missing ';' in XML entity: &" + sb);
137            }
138        }
139        String string = sb.toString();
140        Object object = entity.get(string);
141        return object != null ? object : ampersand + string + ";";
142    }
143
144
145    /**
146     * Returns the next XML meta token. This is used for skipping over <!...>
147     * and <?...?> structures.
148     * @return Syntax characters (<code>< > / = ! ?</code>) are returned as
149     *  Character, and strings and names are returned as Boolean. We don't care
150     *  what the values actually are.
151     * @throws JSONException If a string is not properly closed or if the XML
152     *  is badly structured.
153     */
154    public Object nextMeta() throws JSONException {
155        char c;
156        char q;
157        do {
158            c = next();
159        } while (Character.isWhitespace(c));
160        switch (c) {
161        case 0:
162            throw syntaxError("Misshaped meta tag");
163        case '<':
164            return XML.LT;
165        case '>':
166            return XML.GT;
167        case '/':
168            return XML.SLASH;
169        case '=':
170            return XML.EQ;
171        case '!':
172            return XML.BANG;
173        case '?':
174            return XML.QUEST;
175        case '"':
176        case '\'':
177            q = c;
178            for (;;) {
179                c = next();
180                if (c == 0) {
181                    throw syntaxError("Unterminated string");
182                }
183                if (c == q) {
184                    return Boolean.TRUE;
185                }
186            }
187        default:
188            for (;;) {
189                c = next();
190                if (Character.isWhitespace(c)) {
191                    return Boolean.TRUE;
192                }
193                switch (c) {
194                case 0:
195                case '<':
196                case '>':
197                case '/':
198                case '=':
199                case '!':
200                case '?':
201                case '"':
202                case '\'':
203                    back();
204                    return Boolean.TRUE;
205                }
206            }
207        }
208    }
209
210
211    /**
212     * Get the next XML Token. These tokens are found inside of angle
213     * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it
214     * may be a string wrapped in single quotes or double quotes, or it may be a
215     * name.
216     * @return a String or a Character.
217     * @throws JSONException If the XML is not well formed.
218     */
219    public Object nextToken() throws JSONException {
220        char c;
221        char q;
222        StringBuilder sb;
223        do {
224            c = next();
225        } while (Character.isWhitespace(c));
226        switch (c) {
227        case 0:
228            throw syntaxError("Misshaped element");
229        case '<':
230            throw syntaxError("Misplaced '<'");
231        case '>':
232            return XML.GT;
233        case '/':
234            return XML.SLASH;
235        case '=':
236            return XML.EQ;
237        case '!':
238            return XML.BANG;
239        case '?':
240            return XML.QUEST;
241
242// Quoted string
243
244        case '"':
245        case '\'':
246            q = c;
247            sb = new StringBuilder();
248            for (;;) {
249                c = next();
250                if (c == 0) {
251                    throw syntaxError("Unterminated string");
252                }
253                if (c == q) {
254                    return sb.toString();
255                }
256                if (c == '&') {
257                    sb.append(nextEntity(c));
258                } else {
259                    sb.append(c);
260                }
261            }
262        default:
263
264// Name
265
266            sb = new StringBuilder();
267            for (;;) {
268                sb.append(c);
269                c = next();
270                if (Character.isWhitespace(c)) {
271                    return sb.toString();
272                }
273                switch (c) {
274                case 0:
275                    return sb.toString();
276                case '>':
277                case '/':
278                case '=':
279                case '!':
280                case '?':
281                case '[':
282                case ']':
283                    back();
284                    return sb.toString();
285                case '<':
286                case '"':
287                case '\'':
288                    throw syntaxError("Bad character in a name");
289                }
290            }
291        }
292    }
293
294
295    /**
296     * Skip characters until past the requested string.
297     * If it is not found, we are left at the end of the source with a result of false.
298     * @param to A string to skip past.
299     * @throws JSONException
300     */
301    public boolean skipPast(String to) throws JSONException {
302        boolean b;
303        char c;
304        int i;
305        int j;
306        int offset = 0;
307        int length = to.length();
308        char[] circle = new char[length];
309
310        /*
311         * First fill the circle buffer with as many characters as are in the
312         * to string. If we reach an early end, bail.
313         */
314
315        for (i = 0; i < length; i += 1) {
316            c = next();
317            if (c == 0) {
318                return false;
319            }
320            circle[i] = c;
321        }
322
323        /* We will loop, possibly for all of the remaining characters. */
324
325        for (;;) {
326            j = offset;
327            b = true;
328
329            /* Compare the circle buffer with the to string. */
330
331            for (i = 0; i < length; i += 1) {
332                if (circle[j] != to.charAt(i)) {
333                    b = false;
334                    break;
335                }
336                j += 1;
337                if (j >= length) {
338                    j -= length;
339                }
340            }
341
342            /* If we exit the loop with b intact, then victory is ours. */
343
344            if (b) {
345                return true;
346            }
347
348            /* Get the next character. If there isn't one, then defeat is ours. */
349
350            c = next();
351            if (c == 0) {
352                return false;
353            }
354            /*
355             * Shove the character in the circle buffer and advance the
356             * circle offset. The offset is mod n.
357             */
358            circle[offset] = c;
359            offset += 1;
360            if (offset >= length) {
361                offset -= length;
362            }
363        }
364    }
365}