001package org.jsoup.helper;
002
003import java.net.MalformedURLException;
004import java.net.URL;
005import java.util.Arrays;
006import java.util.Collection;
007import java.util.Iterator;
008
009/**
010 * A minimal String utility class. Designed for internal jsoup use only.
011 */
012public final class StringUtil {
013    // memoised padding up to 21
014    static final String[] padding = {"", " ", "  ", "   ", "    ", "     ", "      ", "       ", "        ",
015        "         ", "          ", "           ", "            ", "             ", "              ", "               ",
016        "                ", "                 ", "                  ", "                   ", "                    "};
017
018    /**
019     * Join a collection of strings by a separator
020     * @param strings collection of string objects
021     * @param sep string to place between strings
022     * @return joined string
023     */
024    public static String join(Collection strings, String sep) {
025        return join(strings.iterator(), sep);
026    }
027
028    /**
029     * Join a collection of strings by a separator
030     * @param strings iterator of string objects
031     * @param sep string to place between strings
032     * @return joined string
033     */
034    public static String join(Iterator strings, String sep) {
035        if (!strings.hasNext())
036            return "";
037
038        String start = strings.next().toString();
039        if (!strings.hasNext()) // only one, avoid builder
040            return start;
041
042        StringBuilder sb = new StringBuilder(64).append(start);
043        while (strings.hasNext()) {
044            sb.append(sep);
045            sb.append(strings.next());
046        }
047        return sb.toString();
048    }
049
050    /**
051     * Join an array of strings by a separator
052     * @param strings collection of string objects
053     * @param sep string to place between strings
054     * @return joined string
055     */
056    public static String join(String[] strings, String sep) {
057        return join(Arrays.asList(strings), sep);
058    }
059
060    /**
061     * Returns space padding
062     * @param width amount of padding desired
063     * @return string of spaces * width
064     */
065    public static String padding(int width) {
066        if (width < 0)
067            throw new IllegalArgumentException("width must be > 0");
068
069        if (width < padding.length)
070            return padding[width];
071        char[] out = new char[width];
072        for (int i = 0; i < width; i++)
073            out[i] = ' ';
074        return String.valueOf(out);
075    }
076
077    /**
078     * Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
079     * @param string string to test
080     * @return if string is blank
081     */
082    public static boolean isBlank(String string) {
083        if (string == null || string.length() == 0)
084            return true;
085
086        int l = string.length();
087        for (int i = 0; i < l; i++) {
088            if (!StringUtil.isWhitespace(string.codePointAt(i)))
089                return false;
090        }
091        return true;
092    }
093
094    /**
095     * Tests if a string is numeric, i.e. contains only digit characters
096     * @param string string to test
097     * @return true if only digit chars, false if empty or null or contains non-digit chars
098     */
099    public static boolean isNumeric(String string) {
100        if (string == null || string.length() == 0)
101            return false;
102
103        int l = string.length();
104        for (int i = 0; i < l; i++) {
105            if (!Character.isDigit(string.codePointAt(i)))
106                return false;
107        }
108        return true;
109    }
110
111    /**
112     * Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
113     * @param c code point to test
114     * @return true if code point is whitespace, false otherwise
115     * @see #isActuallyWhitespace(int)
116     */
117    public static boolean isWhitespace(int c){
118        return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
119    }
120
121    /**
122     * Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
123     * @param c code point to test
124     * @return true if code point is whitespace, false otherwise
125     */
126    public static boolean isActuallyWhitespace(int c){
127        return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160;
128        // 160 is &nbsp; (non-breaking space). Not in the spec but expected.
129    }
130
131    /**
132     * Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
133     * (e.g. newline, tab) convert to a simple space
134     * @param string content to normalise
135     * @return normalised string
136     */
137    public static String normaliseWhitespace(String string) {
138        StringBuilder sb = StringUtil.stringBuilder();
139        appendNormalisedWhitespace(sb, string, false);
140        return sb.toString();
141    }
142
143    /**
144     * After normalizing the whitespace within a string, appends it to a string builder.
145     * @param accum builder to append to
146     * @param string string to normalize whitespace within
147     * @param stripLeading set to true if you wish to remove any leading whitespace
148     */
149    public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
150        boolean lastWasWhite = false;
151        boolean reachedNonWhite = false;
152
153        int len = string.length();
154        int c;
155        for (int i = 0; i < len; i+= Character.charCount(c)) {
156            c = string.codePointAt(i);
157            if (isActuallyWhitespace(c)) {
158                if ((stripLeading && !reachedNonWhite) || lastWasWhite)
159                    continue;
160                accum.append(' ');
161                lastWasWhite = true;
162            }
163            else {
164                accum.appendCodePoint(c);
165                lastWasWhite = false;
166                reachedNonWhite = true;
167            }
168        }
169    }
170
171    public static boolean in(final String needle, final String... haystack) {
172        final int len = haystack.length;
173        for (int i = 0; i < len; i++) {
174            if (haystack[i].equals(needle))
175            return true;
176        }
177        return false;
178    }
179
180    public static boolean inSorted(String needle, String[] haystack) {
181        return Arrays.binarySearch(haystack, needle) >= 0;
182    }
183
184    /**
185     * Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
186     * @param base the existing absolute base URL
187     * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
188     * @return the resolved absolute URL
189     * @throws MalformedURLException if an error occurred generating the URL
190     */
191    public static URL resolve(URL base, String relUrl) throws MalformedURLException {
192        // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
193        if (relUrl.startsWith("?"))
194            relUrl = base.getPath() + relUrl;
195        // workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo
196        if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) {
197            base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile());
198        }
199        return new URL(base, relUrl);
200    }
201
202    /**
203     * Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
204     * @param baseUrl the existing absolute base URL
205     * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
206     * @return an absolute URL if one was able to be generated, or the empty string if not
207     */
208    public static String resolve(final String baseUrl, final String relUrl) {
209        URL base;
210        try {
211            try {
212                base = new URL(baseUrl);
213            } catch (MalformedURLException e) {
214                // the base is unsuitable, but the attribute/rel may be abs on its own, so try that
215                URL abs = new URL(relUrl);
216                return abs.toExternalForm();
217            }
218            return resolve(base, relUrl).toExternalForm();
219        } catch (MalformedURLException e) {
220            return "";
221        }
222    }
223
224    /**
225     * Maintains a cached StringBuilder, to minimize new StringBuilder GCs. Prevents it from growing to big per thread.
226     * Care must be taken to not grab more than one in the same stack (not locked or mutexed or anything).
227     * @return an empty StringBuilder
228     */
229    public static StringBuilder stringBuilder() {
230        StringBuilder sb = stringLocal.get();
231        if (sb.length() > MaxCachedBuilderSize) {
232            sb = new StringBuilder(MaxCachedBuilderSize);
233            stringLocal.set(sb);
234        } else {
235            sb.delete(0, sb.length());
236        }
237        return sb;
238
239    }
240
241    private static final int MaxCachedBuilderSize = 8 * 1024;
242    private static final ThreadLocal<StringBuilder> stringLocal = new ThreadLocal<StringBuilder>(){
243        @Override
244        protected StringBuilder initialValue() {
245            return new StringBuilder(MaxCachedBuilderSize);
246        }
247    };
248
249
250}