001/* 002 * gnu/regexp/RETokenWordBoundary.java 003 * Copyright (C) 2001 Wes Biggs 004 * 005 * This library is free software; you can redistribute it and/or modify 006 * it under the terms of the GNU Lesser General Public License as published 007 * by the Free Software Foundation; either version 2.1 of the License, or 008 * (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public License 016 * along with this program; if not, write to the Free Software 017 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 018 */ 019 020package gnu.regexp; 021 022/** 023 * Represents a combination lookahead/lookbehind for POSIX [:alnum:]. 024 */ 025final class RETokenWordBoundary extends REToken { 026 private boolean negated; 027 private int where; 028 static final int BEGIN = 1; 029 static final int END = 2; 030 031 RETokenWordBoundary(int subIndex, int where, boolean negated) { 032 super(subIndex); 033 this.where = where; 034 this.negated = negated; 035 } 036 037 boolean match(CharIndexed input, REMatch mymatch) { 038 // Word boundary means input[index-1] was a word character 039 // and input[index] is not, or input[index] is a word character 040 // and input[index-1] was not 041 // In the string "one two three", these positions match: 042 // |o|n|e| |t|w|o| |t|h|r|e|e| 043 // ^ ^ ^ ^ ^ ^ 044 boolean after = false; // is current character a letter or digit? 045 boolean before = false; // is previous character a letter or digit? 046 char ch; 047 048 // TODO: Also check REG_ANCHORINDEX vs. anchor 049 if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX) 050 || (mymatch.offset + mymatch.index > mymatch.anchor)) { 051 if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) { 052 before = Character.isLetterOrDigit(ch) || (ch == '_'); 053 } 054 } 055 056 if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) { 057 after = Character.isLetterOrDigit(ch) || (ch == '_'); 058 } 059 060 // if (before) and (!after), we're at end (\>) 061 // if (after) and (!before), we're at beginning (\<) 062 boolean doNext = false; 063 064 if ((where & BEGIN) == BEGIN) { 065 doNext = after && !before; 066 } 067 if ((where & END) == END) { 068 doNext ^= before && !after; 069 } 070 071 if (negated) doNext = !doNext; 072 073 return (doNext ? next(input, mymatch) : false); 074 } 075 076 void dump(StringBuffer os) { 077 if (where == (BEGIN | END)) { 078 os.append( negated ? "\\B" : "\\b" ); 079 } else if (where == BEGIN) { 080 os.append("\\<"); 081 } else { 082 os.append("\\>"); 083 } 084 } 085}