001/*
002 *  gnu/regexp/RETokenWordBoundary.java
003 *  Copyright (C) 2001 Wes Biggs
004 *
005 *  This library is free software; you can redistribute it and/or modify
006 *  it under the terms of the GNU Lesser General Public License as published
007 *  by the Free Software Foundation; either version 2.1 of the License, or
008 *  (at your option) any later version.
009 *
010 *  This library is distributed in the hope that it will be useful,
011 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
012 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 *  GNU Lesser General Public License for more details.
014 *
015 *  You should have received a copy of the GNU Lesser General Public License
016 *  along with this program; if not, write to the Free Software
017 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
018 */
019
020package gnu.regexp;
021
022/**
023 * Represents a combination lookahead/lookbehind for POSIX [:alnum:].
024 */
025final class RETokenWordBoundary extends REToken {
026    private boolean negated;
027    private int where;
028    static final int BEGIN = 1;
029    static final int END = 2;
030
031    RETokenWordBoundary(int subIndex, int where, boolean negated) {
032        super(subIndex);
033        this.where = where;
034        this.negated = negated;
035    }
036    
037    boolean match(CharIndexed input, REMatch mymatch) {
038        // Word boundary means input[index-1] was a word character
039        // and input[index] is not, or input[index] is a word character
040        // and input[index-1] was not
041        //  In the string "one two three", these positions match:
042        //  |o|n|e| |t|w|o| |t|h|r|e|e|
043        //  ^     ^ ^     ^ ^         ^
044        boolean after = false;  // is current character a letter or digit?
045        boolean before = false; // is previous character a letter or digit?
046        char ch;
047
048        // TODO: Also check REG_ANCHORINDEX vs. anchor
049        if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX) 
050            || (mymatch.offset + mymatch.index > mymatch.anchor)) {
051            if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) {
052                before = Character.isLetterOrDigit(ch) || (ch == '_');
053            }
054        }
055
056        if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) {
057            after = Character.isLetterOrDigit(ch) || (ch == '_');
058        }
059
060        // if (before) and (!after), we're at end (\>)
061        // if (after) and (!before), we're at beginning (\<)
062        boolean doNext = false;
063
064        if ((where & BEGIN) == BEGIN) {
065            doNext = after && !before;
066        }
067        if ((where & END) == END) {
068            doNext ^= before && !after;
069        }
070
071        if (negated) doNext = !doNext;
072
073        return (doNext ? next(input, mymatch) : false);
074    }
075    
076    void dump(StringBuffer os) {
077        if (where == (BEGIN | END)) {
078            os.append( negated ? "\\B" : "\\b" );
079        } else if (where == BEGIN) {
080            os.append("\\<");
081        } else {
082            os.append("\\>");
083        }
084    }
085}