001package au.com.bytecode.opencsv;
002
003/**
004 Copyright 2005 Bytecode Pty Ltd.
005
006 Licensed under the Apache License, Version 2.0 (the "License");
007 you may not use this file except in compliance with the License.
008 You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing, software
013 distributed under the License is distributed on an "AS IS" BASIS,
014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 See the License for the specific language governing permissions and
016 limitations under the License.
017 */
018
019import java.io.BufferedReader;
020import java.io.IOException;
021import java.io.Reader;
022import java.util.ArrayList;
023import java.util.List;
024
025/**
026 * A very simple CSV reader released under a commercial-friendly license.
027 * 
028 * @author Glen Smith
029 * 
030 */
031public class CSVReader {
032
033    private BufferedReader br;
034
035    private boolean hasNext = true;
036
037    private char separator;
038
039    private char quotechar;
040    
041    private int skipLines;
042
043    private boolean linesSkiped;
044
045    /** The default separator to use if none is supplied to the constructor. */
046    public static final char DEFAULT_SEPARATOR = ',';
047
048    /**
049     * The default quote character to use if none is supplied to the
050     * constructor.
051     */
052    public static final char DEFAULT_QUOTE_CHARACTER = '"';
053    
054    /**
055     * The default line to start reading.
056     */
057    public static final int DEFAULT_SKIP_LINES = 0;
058
059    /**
060     * Constructs CSVReader using a comma for the separator.
061     * 
062     * @param reader
063     *            the reader to an underlying CSV source.
064     */
065    public CSVReader(Reader reader) {
066        this(reader, DEFAULT_SEPARATOR);
067    }
068
069    /**
070     * Constructs CSVReader with supplied separator.
071     * 
072     * @param reader
073     *            the reader to an underlying CSV source.
074     * @param separator
075     *            the delimiter to use for separating entries.
076     */
077    public CSVReader(Reader reader, char separator) {
078        this(reader, separator, DEFAULT_QUOTE_CHARACTER);
079    }
080    
081    
082
083    /**
084     * Constructs CSVReader with supplied separator and quote char.
085     * 
086     * @param reader
087     *            the reader to an underlying CSV source.
088     * @param separator
089     *            the delimiter to use for separating entries
090     * @param quotechar
091     *            the character to use for quoted elements
092     */
093    public CSVReader(Reader reader, char separator, char quotechar) {
094        this(reader, separator, quotechar, DEFAULT_SKIP_LINES);
095    }
096    
097    /**
098     * Constructs CSVReader with supplied separator and quote char.
099     * 
100     * @param reader
101     *            the reader to an underlying CSV source.
102     * @param separator
103     *            the delimiter to use for separating entries
104     * @param quotechar
105     *            the character to use for quoted elements
106     * @param line
107     *            the line number to skip for start reading 
108     */
109    public CSVReader(Reader reader, char separator, char quotechar, int line) {
110        this.br = new BufferedReader(reader);
111        this.separator = separator;
112        this.quotechar = quotechar;
113        this.skipLines = line;
114    }
115
116    /**
117     * Reads the entire file into a List with each element being a String[] of
118     * tokens.
119     * 
120     * @return a List of String[], with each String[] representing a line of the
121     *         file.
122     * 
123     * @throws IOException
124     *             if bad things happen during the read
125     */
126    public List readAll() throws IOException {
127
128        List allElements = new ArrayList();
129        while (hasNext) {
130            String[] nextLineAsTokens = readNext();
131            if (nextLineAsTokens != null)
132                allElements.add(nextLineAsTokens);
133        }
134        return allElements;
135
136    }
137
138    /**
139     * Reads the next line from the buffer and converts to a string array.
140     * 
141     * @return a string array with each comma-separated element as a separate
142     *         entry.
143     * 
144     * @throws IOException
145     *             if bad things happen during the read
146     */
147    public String[] readNext() throws IOException {
148
149        String nextLine = getNextLine();
150        return hasNext ? parseLine(nextLine) : null;
151    }
152
153    /**
154     * Reads the next line from the file.
155     * 
156     * @return the next line from the file without trailing newline
157     * @throws IOException
158     *             if bad things happen during the read
159     */
160    private String getNextLine() throws IOException {
161        if (!this.linesSkiped) {
162            for (int i = 0; i < skipLines; i++) {
163                br.readLine();
164            }
165            this.linesSkiped = true;
166        }
167        String nextLine = br.readLine();
168        if (nextLine == null) {
169            hasNext = false;
170        }
171        return hasNext ? nextLine : null;
172    }
173
174    /**
175     * Parses an incoming String and returns an array of elements.
176     * 
177     * @param nextLine
178     *            the string to parse
179     * @return the comma-tokenized list of elements, or null if nextLine is null
180     * @throws IOException if bad things happen during the read
181     */
182    private String[] parseLine(String nextLine) throws IOException {
183
184        if (nextLine == null) {
185            return null;
186        }
187
188        List tokensOnThisLine = new ArrayList();
189        StringBuffer sb = new StringBuffer();
190        boolean inQuotes = false;
191        do {
192                if (inQuotes) {
193                // continuing a quoted section, reappend newline
194                sb.append("\n");
195                nextLine = getNextLine();
196                if (nextLine == null)
197                    break;
198            }
199            for (int i = 0; i < nextLine.length(); i++) {
200
201                char c = nextLine.charAt(i);
202                if (c == quotechar) {
203                        // this gets complex... the quote may end a quoted block, or escape another quote.
204                        // do a 1-char lookahead:
205                        if( inQuotes  // we are in quotes, therefore there can be escaped quotes in here.
206                            && nextLine.length() > (i+1)  // there is indeed another character to check.
207                            && nextLine.charAt(i+1) == quotechar ){ // ..and that char. is a quote also.
208                                // we have two quote chars in a row == one quote char, so consume them both and
209                                // put one on the token. we do *not* exit the quoted text.
210                                sb.append(nextLine.charAt(i+1));
211                                i++;
212                        }else{
213                                inQuotes = !inQuotes;
214                                // the tricky case of an embedded quote in the middle: a,bc"d"ef,g
215                                if(i>2 //not on the begining of the line
216                                                && nextLine.charAt(i-1) != this.separator //not at the begining of an escape sequence 
217                                                && nextLine.length()>(i+1) &&
218                                                nextLine.charAt(i+1) != this.separator //not at the     end of an escape sequence
219                                ){
220                                        sb.append(c);
221                                }
222                        }
223                } else if (c == separator && !inQuotes) {
224                    tokensOnThisLine.add(sb.toString());
225                    sb = new StringBuffer(); // start work on next token
226                } else {
227                    sb.append(c);
228                }
229            }
230        } while (inQuotes);
231        tokensOnThisLine.add(sb.toString());
232        return (String[]) tokensOnThisLine.toArray(new String[0]);
233
234    }
235
236    /**
237     * Closes the underlying reader.
238     * 
239     * @throws IOException if the close fails
240     */
241    public void close() throws IOException{
242        br.close();
243    }
244    
245}