001package org.jsoup.helper; 002 003import java.net.MalformedURLException; 004import java.net.URL; 005import java.util.Arrays; 006import java.util.Collection; 007import java.util.Iterator; 008 009/** 010 * A minimal String utility class. Designed for internal jsoup use only. 011 */ 012public final class StringUtil { 013 // memoised padding up to 21 014 static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", 015 " ", " ", " ", " ", " ", " ", " ", 016 " ", " ", " ", " ", " "}; 017 018 /** 019 * Join a collection of strings by a separator 020 * @param strings collection of string objects 021 * @param sep string to place between strings 022 * @return joined string 023 */ 024 public static String join(Collection strings, String sep) { 025 return join(strings.iterator(), sep); 026 } 027 028 /** 029 * Join a collection of strings by a separator 030 * @param strings iterator of string objects 031 * @param sep string to place between strings 032 * @return joined string 033 */ 034 public static String join(Iterator strings, String sep) { 035 if (!strings.hasNext()) 036 return ""; 037 038 String start = strings.next().toString(); 039 if (!strings.hasNext()) // only one, avoid builder 040 return start; 041 042 StringBuilder sb = new StringBuilder(64).append(start); 043 while (strings.hasNext()) { 044 sb.append(sep); 045 sb.append(strings.next()); 046 } 047 return sb.toString(); 048 } 049 050 /** 051 * Join an array of strings by a separator 052 * @param strings collection of string objects 053 * @param sep string to place between strings 054 * @return joined string 055 */ 056 public static String join(String[] strings, String sep) { 057 return join(Arrays.asList(strings), sep); 058 } 059 060 /** 061 * Returns space padding 062 * @param width amount of padding desired 063 * @return string of spaces * width 064 */ 065 public static String padding(int width) { 066 if (width < 0) 067 throw new IllegalArgumentException("width must be > 0"); 068 069 if (width < padding.length) 070 return padding[width]; 071 char[] out = new char[width]; 072 for (int i = 0; i < width; i++) 073 out[i] = ' '; 074 return String.valueOf(out); 075 } 076 077 /** 078 * Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc) 079 * @param string string to test 080 * @return if string is blank 081 */ 082 public static boolean isBlank(String string) { 083 if (string == null || string.length() == 0) 084 return true; 085 086 int l = string.length(); 087 for (int i = 0; i < l; i++) { 088 if (!StringUtil.isWhitespace(string.codePointAt(i))) 089 return false; 090 } 091 return true; 092 } 093 094 /** 095 * Tests if a string is numeric, i.e. contains only digit characters 096 * @param string string to test 097 * @return true if only digit chars, false if empty or null or contains non-digit chars 098 */ 099 public static boolean isNumeric(String string) { 100 if (string == null || string.length() == 0) 101 return false; 102 103 int l = string.length(); 104 for (int i = 0; i < l; i++) { 105 if (!Character.isDigit(string.codePointAt(i))) 106 return false; 107 } 108 return true; 109 } 110 111 /** 112 * Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML. 113 * @param c code point to test 114 * @return true if code point is whitespace, false otherwise 115 * @see #isActuallyWhitespace(int) 116 */ 117 public static boolean isWhitespace(int c){ 118 return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r'; 119 } 120 121 /** 122 * Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc. 123 * @param c code point to test 124 * @return true if code point is whitespace, false otherwise 125 */ 126 public static boolean isActuallyWhitespace(int c){ 127 return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160; 128 // 160 is (non-breaking space). Not in the spec but expected. 129 } 130 131 /** 132 * Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters 133 * (e.g. newline, tab) convert to a simple space 134 * @param string content to normalise 135 * @return normalised string 136 */ 137 public static String normaliseWhitespace(String string) { 138 StringBuilder sb = StringUtil.stringBuilder(); 139 appendNormalisedWhitespace(sb, string, false); 140 return sb.toString(); 141 } 142 143 /** 144 * After normalizing the whitespace within a string, appends it to a string builder. 145 * @param accum builder to append to 146 * @param string string to normalize whitespace within 147 * @param stripLeading set to true if you wish to remove any leading whitespace 148 */ 149 public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) { 150 boolean lastWasWhite = false; 151 boolean reachedNonWhite = false; 152 153 int len = string.length(); 154 int c; 155 for (int i = 0; i < len; i+= Character.charCount(c)) { 156 c = string.codePointAt(i); 157 if (isActuallyWhitespace(c)) { 158 if ((stripLeading && !reachedNonWhite) || lastWasWhite) 159 continue; 160 accum.append(' '); 161 lastWasWhite = true; 162 } 163 else { 164 accum.appendCodePoint(c); 165 lastWasWhite = false; 166 reachedNonWhite = true; 167 } 168 } 169 } 170 171 public static boolean in(final String needle, final String... haystack) { 172 final int len = haystack.length; 173 for (int i = 0; i < len; i++) { 174 if (haystack[i].equals(needle)) 175 return true; 176 } 177 return false; 178 } 179 180 public static boolean inSorted(String needle, String[] haystack) { 181 return Arrays.binarySearch(haystack, needle) >= 0; 182 } 183 184 /** 185 * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. 186 * @param base the existing absolute base URL 187 * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) 188 * @return the resolved absolute URL 189 * @throws MalformedURLException if an error occurred generating the URL 190 */ 191 public static URL resolve(URL base, String relUrl) throws MalformedURLException { 192 // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired 193 if (relUrl.startsWith("?")) 194 relUrl = base.getPath() + relUrl; 195 // workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo 196 if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) { 197 base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile()); 198 } 199 return new URL(base, relUrl); 200 } 201 202 /** 203 * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. 204 * @param baseUrl the existing absolute base URL 205 * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) 206 * @return an absolute URL if one was able to be generated, or the empty string if not 207 */ 208 public static String resolve(final String baseUrl, final String relUrl) { 209 URL base; 210 try { 211 try { 212 base = new URL(baseUrl); 213 } catch (MalformedURLException e) { 214 // the base is unsuitable, but the attribute/rel may be abs on its own, so try that 215 URL abs = new URL(relUrl); 216 return abs.toExternalForm(); 217 } 218 return resolve(base, relUrl).toExternalForm(); 219 } catch (MalformedURLException e) { 220 return ""; 221 } 222 } 223 224 /** 225 * Maintains a cached StringBuilder, to minimize new StringBuilder GCs. Prevents it from growing to big per thread. 226 * Care must be taken to not grab more than one in the same stack (not locked or mutexed or anything). 227 * @return an empty StringBuilder 228 */ 229 public static StringBuilder stringBuilder() { 230 StringBuilder sb = stringLocal.get(); 231 if (sb.length() > MaxCachedBuilderSize) { 232 sb = new StringBuilder(MaxCachedBuilderSize); 233 stringLocal.set(sb); 234 } else { 235 sb.delete(0, sb.length()); 236 } 237 return sb; 238 239 } 240 241 private static final int MaxCachedBuilderSize = 8 * 1024; 242 private static final ThreadLocal<StringBuilder> stringLocal = new ThreadLocal<StringBuilder>(){ 243 @Override 244 protected StringBuilder initialValue() { 245 return new StringBuilder(MaxCachedBuilderSize); 246 } 247 }; 248 249 250}