001/** 002 * RenameWand 2.2 003 * Copyright 2007 Zach Scrivena 004 * 2007-12-09 005 * zachscrivena@gmail.com 006 * http://renamewand.sourceforge.net/ 007 * 008 * RenameWand is a simple command-line utility for renaming files or 009 * directories using an intuitive but powerful syntax. 010 * 011 * TERMS AND CONDITIONS: 012 * This program is free software: you can redistribute it and/or modify 013 * it under the terms of the GNU General Public License as published by 014 * the Free Software Foundation, either version 3 of the License, or 015 * (at your option) any later version. 016 * 017 * This program is distributed in the hope that it will be useful, 018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 020 * GNU General Public License for more details. 021 * 022 * You should have received a copy of the GNU General Public License 023 * along with this program. If not, see <http://www.gnu.org/licenses/>. 024 */ 025 026package ca.bc.webarts.tools.renamewand; 027 028import java.text.NumberFormat; 029import java.util.regex.Pattern; 030import java.util.regex.Matcher; 031import java.util.ArrayList; 032import java.util.List; 033 034 035/** 036 * Simple class for manipulating strings. 037 */ 038class StringManipulator 039{ 040 /** default regex delimiter pattern */ 041 private static final String defaultRegexDelim = "[\\s]++"; 042 043 044 /** 045 * Tokenize the given string, using the specified regex delimiter pattern. 046 * 047 * @param in 048 * String to be tokenized 049 * @param regexDelim 050 * Regex delimiter pattern 051 * @param includeDelim 052 * If true, then delimiter tokens are returned too; 053 * otherwise, only non-delimiter tokens are returned 054 * @return 055 * Tokens in an array of strings; 056 * null, if the string to be tokenized is null 057 */ 058 public static Token[] tokenize( 059 final String in, 060 final String regexDelim, 061 final boolean includeDelimiters) 062 { 063 /* null input string */ 064 if (in == null) 065 return null; 066 067 /* regex matcher for delimiter */ 068 final Matcher delimiterMatcher = (regexDelim == null) ? 069 Pattern.compile(defaultRegexDelim).matcher(in) : 070 Pattern.compile(regexDelim).matcher(in); 071 072 /* return value */ 073 final List<Token> tokens = new ArrayList<Token>(); 074 075 /* initialize buffer string */ 076 StringBuilder buffer = new StringBuilder(); 077 078 /* parse each character in input string */ 079 for (int i = 0; i < in.length(); i++) 080 { 081 delimiterMatcher.region(i, in.length()); 082 083 if (delimiterMatcher.lookingAt()) 084 { 085 /* found delimiter match starting at this index */ 086 087 /* add buffer string to tokens if nonempty */ 088 if (buffer.length() > 0) 089 { 090 tokens.add(new Token(buffer.toString(), false)); 091 buffer = new StringBuilder(); 092 } 093 094 if (includeDelimiters) 095 tokens.add(new Token(delimiterMatcher.group(), true)); 096 097 /* advance index by length of the delimiter match */ 098 i += delimiterMatcher.group().length() - 1; 099 } 100 else 101 { 102 /* not a match at this index, so we add the char */ 103 /* to the buffer string */ 104 buffer.append(in.charAt(i)); 105 } 106 } 107 108 /* flush buffer string if nonempty */ 109 if (buffer.length() > 0) 110 tokens.add(new Token(buffer.toString(), false)); 111 112 /* return value */ 113 return tokens.toArray(new Token[tokens.size()]); 114 } 115 116 117 /** 118 * Inner class for representing a token 119 */ 120 public static class Token 121 { 122 public String val; 123 public boolean isDelimiter; 124 125 public Token(String val, boolean isDelimiter) 126 { 127 this.val = val; 128 this.isDelimiter = isDelimiter; 129 } 130 } 131 132 133 /** 134 * Extract a substring from a given string, according to the specified 135 * format. 136 * 137 * @param in 138 * String from which the substring is to be extracted 139 * @param format 140 * Format string describing the sequence of characters in the substring 141 * @param rangeChar 142 * Range character to be used 143 * @param delimChar 144 * Delimiter character to be used 145 * @return 146 * Substring of the given string 147 */ 148 public static String substring( 149 final String in, 150 final String format, 151 final char rangeChar, 152 final char delimChar) 153 { 154 /* length of input string */ 155 final int len = in.length(); 156 157 /* nothing to do for empty string */ 158 if (len == 0) return in; 159 160 /* tokenize format string by delimiter character, e.g. ',' */ 161 final String[] tokens = format.split("[\\" + delimChar + "]++"); 162 163 /* return value */ 164 final StringBuilder out = new StringBuilder(); 165 166 /* Regex pattern for nonzero integers */ 167 final Pattern nonZeroIntegerPattern = 168 Pattern.compile("\\s*([\\+\\-]?[1-9][0-9]*)\\s*"); 169 170 /* process each token */ 171 for (int i = 0; i < tokens.length; i++) 172 { 173 /* split betwen range characters, e.g. ':' */ 174 final String[] entries = tokens[i].split("[\\" + rangeChar + "]++"); 175 176 int[] indices = new int[entries.length]; 177 178 /* check if entries are all non-zero integers */ 179 for (int j = 0; j < entries.length; j++) 180 { 181 final String entry = entries[j]; 182 183 if (nonZeroIntegerPattern.matcher(entry).matches()) 184 { 185 /* convert to int */ 186 indices[j] = Integer.parseInt(entry); 187 } 188 else 189 { 190 /* error; not a nonzero integer */ 191 return null; 192 } 193 } 194 195 if (indices.length == 1) 196 { 197 /* format "a" */ 198 indices[0] = normalizeIndex(len, indices[0]); 199 out.append(in.charAt(indices[0] - 1)); 200 } 201 else if (indices.length == 2) 202 { 203 /* format "a:b" */ 204 indices[0] = normalizeIndex(len, indices[0]); 205 indices[1] = normalizeIndex(len, indices[1]); 206 final int delta = (indices[0] <= indices[1]) ? 1 : -1; 207 208 for (int k = indices[0]; ; k += delta) 209 { 210 if (delta > 0) 211 { 212 if (k > indices[1]) break; 213 } 214 else 215 { 216 if (k < indices[1]) break; 217 } 218 out.append(in.charAt(k - 1)); 219 } 220 } 221 else if (indices.length == 3) 222 { 223 /* format "a:b:c" */ 224 indices[0] = normalizeIndex(len, indices[0]); 225 indices[2] = normalizeIndex(len, indices[2]); 226 if ((indices[2] - indices[0]) * indices[1] < 0) continue; 227 for (int k = indices[0]; ; k += indices[1]) 228 { 229 if (indices[1] > 0) 230 { 231 if (k > indices[2]) break; 232 } 233 else 234 { 235 if (k < indices[2]) break; 236 } 237 out.append(in.charAt(k - 1)); 238 } 239 } 240 else 241 { 242 /* invalid format string */ 243 return null; 244 } 245 } 246 247 return out.toString(); 248 } 249 250 251 /** 252 * Normalize user-specified index value. 253 * 254 * @param len 255 * Length of the source string 256 * @param index 257 * User-specified index value 258 * @return 259 * Normalized index value 260 */ 261 private static int normalizeIndex( 262 final int len, 263 final int index) 264 { 265 /* clip index to [1,len] */ 266 int newIndex = index; 267 if (newIndex < 0) newIndex += (len + 1); 268 if (newIndex < 1) newIndex = 1; 269 if (newIndex > len) newIndex = len; 270 return newIndex; 271 } 272 273 274 /** 275 * Return a formatted string representation of a given long number 276 * (format is locale-sensitive). 277 * 278 * @param n 279 * Long number to be formatted 280 * @return 281 * Formatted string representation of the given long number 282 */ 283 public static String formattedLong( 284 final long n) 285 { 286 final NumberFormat nf = NumberFormat.getNumberInstance(); 287 nf.setGroupingUsed(true); 288 289 try 290 { 291 return nf.format(n); 292 } 293 catch (Exception e) 294 { 295 return (n + ""); 296 } 297 } 298 299 300 /** 301 * Return a formatted string representation of a given double number 302 * (format is locale-sensitive). 303 * 304 * @param n 305 * Double number to be formatted 306 * @return 307 * Formatted string representation of the given double number 308 */ 309 public static String formattedDouble( 310 final double n) 311 { 312 final NumberFormat nf = NumberFormat.getNumberInstance(); 313 nf.setGroupingUsed(true); 314 315 try 316 { 317 return nf.format(n); 318 } 319 catch (Exception e) 320 { 321 return (n + ""); 322 } 323 } 324 325 326 /** 327 * Center-justify the string representation of a given object, padding with 328 * leading and trailing spaces so that its length is at least the specified 329 * width. 330 * 331 * @param o 332 * Object to be center-justified 333 * @param width 334 * Width of the resulting center-justified string 335 * @return 336 * Center-justified string representation 337 */ 338 public static String centerJustify( 339 final Object o, 340 final int width) 341 { 342 final String s = o + ""; 343 final int len = s.length(); 344 final int totalSpace = width - len; 345 346 if (totalSpace <= 0) 347 return s; 348 349 final StringBuilder t = new StringBuilder(); 350 351 final int leadingSpace = totalSpace / 2; 352 353 for (int i = 0; i < leadingSpace; i++) 354 t.append(' '); 355 356 t.append(s); 357 358 for (int i = 0; i < totalSpace - leadingSpace; i++) 359 t.append(' '); 360 361 return t.toString(); 362 } 363 364 365 /** 366 * Left-justify the string representation of a given object, padding with 367 * trailing spaces so that its length is at least the specified width. 368 * 369 * @param o 370 * Object for to be left-justified 371 * @param width 372 * Width of the resulting left-justified string 373 * @return 374 * Left-justified string representation 375 */ 376 public static String leftJustify( 377 final Object o, 378 final int width) 379 { 380 final String s = o + ""; 381 final int len = s.length(); 382 final int totalSpace = width - len; 383 384 if (totalSpace <= 0) 385 return s; 386 387 final StringBuilder t = new StringBuilder(); 388 389 for (int i = 0; i < totalSpace; i++) 390 t.append(' '); 391 392 t.append(s); 393 394 return t.toString(); 395 } 396 397 398 /** 399 * Right-justify the string representation of a given object, padding with 400 * leading spaces so that its length is at least the specified width. 401 * 402 * @param o 403 * Object to be right-justified 404 * @param width 405 * Width of the resulting right-justified string 406 * @return 407 * Right-justified string representation 408 */ 409 public static String rightJustify( 410 final Object o, 411 final int width) 412 { 413 final String s = o + ""; 414 final int len = s.length(); 415 final int totalSpace = width - len; 416 417 if (totalSpace <= 0) 418 return s; 419 420 final StringBuilder t = new StringBuilder(s); 421 422 for (int i = 0; i < totalSpace; i++) 423 t.append(' '); 424 425 return t.toString(); 426 } 427 428 429 /** 430 * Repeat the string representation of a given object, a specified number 431 * of times. 432 * 433 * @param o 434 * Object to be repeated 435 * @param n 436 * Number of times to repeat 437 * @return 438 * Repeated string representation 439 */ 440 public static String repeat( 441 final Object o, 442 final int n) 443 { 444 final String s = o + ""; 445 final StringBuilder t = new StringBuilder(); 446 447 for (int i = 0; i < n; i++) 448 t.append(s); 449 450 return t.toString(); 451 } 452 453 454 /** 455 * Convert a specified string to title case, by capitalizing only the 456 * first letter of each word. 457 * 458 * @param s 459 * Input string 460 * @return 461 * Output string 462 */ 463 public static String toTitleCase( 464 final String s) 465 { 466 final Token[] tokens = tokenize(s, "[\\s\\p{Punct}]++", true); 467 final StringBuilder t = new StringBuilder(); 468 469 for (Token token : tokens) 470 { 471 if (token.val.isEmpty()) 472 continue; 473 474 if (token.isDelimiter) 475 { 476 t.append(token.val); 477 } 478 else 479 { 480 t.append(Character.toUpperCase(token.val.charAt(0))); 481 t.append(token.val.substring(1).toLowerCase()); 482 } 483 } 484 485 return t.toString(); 486 } 487 488 489 /** 490 * Abbreviate a specified string, by keeping only the first letter 491 * of each word. 492 * 493 * @param s 494 * Input string 495 * @return 496 * Output string 497 */ 498 public static String abbreviate( 499 final String s) 500 { 501 final Token[] tokens = tokenize(s, "[\\s\\p{Punct}]++", true); 502 final StringBuilder t = new StringBuilder(); 503 504 for (Token token : tokens) 505 { 506 if (token.val.isEmpty()) 507 continue; 508 509 if (token.isDelimiter) 510 { 511 t.append(token.val); 512 } 513 else 514 { 515 t.append(token.val.charAt(0)); 516 } 517 } 518 519 return t.toString(); 520 } 521 522 523 /** 524 * Reverse the string. 525 * 526 * @param s 527 * Input string 528 * @return 529 * Output string 530 */ 531 public static String reverse( 532 final String s) 533 { 534 return (new StringBuilder(s)).reverse().toString(); 535 } 536 537 538 /** 539 * Trim away whitespace on the left. 540 * 541 * @param s 542 * Input string 543 * @return 544 * Output string 545 */ 546 public static String leftTrim( 547 final String s) 548 { 549 final Matcher m = Pattern.compile("[\\s]++(.*)").matcher(s); 550 551 if (m.matches()) 552 return m.group(1); 553 554 return s; 555 } 556 557 558 /** 559 * Trim away whitespace on the right. 560 * 561 * @param s 562 * Input string 563 * @return 564 * Output string 565 */ 566 public static String rightTrim( 567 final String s) 568 { 569 return reverse(leftTrim(reverse(s))); 570 } 571 572 573 /** 574 * Delete extra whitespace in a specified string, by replacing contiguous 575 * whitespace characters with a single space. 576 * 577 * @param s 578 * Input string 579 * @return 580 * Output string 581 */ 582 public static String deleteExtraWhitespace( 583 final String s) 584 { 585 final Token[] tokens = tokenize(s, "[\\s]++", true); 586 final StringBuilder t = new StringBuilder(); 587 588 for (Token token : tokens) 589 { 590 if (token.val.isEmpty()) continue; 591 592 if (token.isDelimiter) 593 { 594 t.append(' '); 595 } 596 else 597 { 598 t.append(token.val); 599 } 600 } 601 602 return t.toString(); 603 } 604 605 606 /** 607 * Delete whitespace in a specified string, by deleting all whitespace 608 * characters. 609 * 610 * @param s 611 * Input string 612 * @return 613 * Output string 614 */ 615 public static String deleteWhitespace( 616 final String s) 617 { 618 final String[] tokens = s.split("[\\s]++"); 619 final StringBuilder t = new StringBuilder(); 620 621 for (String token : tokens) 622 t.append(token); 623 624 return t.toString(); 625 } 626 627 628 /** 629 * Delete punctuation in a specified string, by deleting all punctuation 630 * characters. 631 * 632 * @param s 633 * Input string 634 * @return 635 * Output string 636 */ 637 public static String deletePunctuation( 638 final String s) 639 { 640 final String[] tokens = s.split("[\\p{Punct}]++"); 641 final StringBuilder t = new StringBuilder(); 642 643 for (String token : tokens) 644 t.append(token); 645 646 return t.toString(); 647 } 648 649 650 /** 651 * Space out words in a specified string, by inserting a single space 652 * between concatenated words. 653 * 654 * @param s 655 * Input string 656 * @return 657 * Output string 658 */ 659 public static String spaceOutWords( 660 final String s) 661 { 662 final StringBuilder t = new StringBuilder(); 663 664 for (int i = 0; i < s.length(); i++) 665 { 666 final char c = s.charAt(i); 667 668 if (Character.isLowerCase(c) && 669 (i + 1 < s.length()) && 670 Character.isUpperCase(s.charAt(i + 1))) 671 { 672 t.append(c); 673 t.append(' '); 674 } 675 else if (Character.isUpperCase(c) && 676 (i - 1 >= 0) && 677 Character.isUpperCase(s.charAt(i - 1)) && 678 (i + 1 < s.length()) && 679 Character.isLowerCase(s.charAt(i + 1))) 680 { 681 t.append(' '); 682 t.append(c); 683 } 684 else 685 { 686 t.append(c); 687 } 688 } 689 690 return t.toString(); 691 } 692 693 694 /** 695 * Swap the case of a specified string, by converting lower case 696 * characters to upper case and vice versa. 697 * 698 * @param s 699 * Input string 700 * @return 701 * Output string 702 */ 703 public static String swapCase( 704 final String s) 705 { 706 final StringBuilder t = new StringBuilder(); 707 708 for (int i = 0; i < s.length(); i++) 709 { 710 final char c = s.charAt(i); 711 712 if (Character.isLowerCase(c)) 713 { 714 t.append(Character.toUpperCase(c)); 715 } 716 else if (Character.isUpperCase(c)) 717 { 718 t.append(Character.toLowerCase(c)); 719 } 720 else 721 { 722 t.append(c); 723 } 724 } 725 726 return t.toString(); 727 } 728}