001package au.com.bytecode.opencsv; 002 003/** 004 Copyright 2005 Bytecode Pty Ltd. 005 006 Licensed under the Apache License, Version 2.0 (the "License"); 007 you may not use this file except in compliance with the License. 008 You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, software 013 distributed under the License is distributed on an "AS IS" BASIS, 014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 See the License for the specific language governing permissions and 016 limitations under the License. 017 */ 018 019import java.io.BufferedReader; 020import java.io.IOException; 021import java.io.Reader; 022import java.util.ArrayList; 023import java.util.List; 024 025/** 026 * A very simple CSV reader released under a commercial-friendly license. 027 * 028 * @author Glen Smith 029 * 030 */ 031public class CSVReader { 032 033 private BufferedReader br; 034 035 private boolean hasNext = true; 036 037 private char separator; 038 039 private char quotechar; 040 041 private int skipLines; 042 043 private boolean linesSkiped; 044 045 /** The default separator to use if none is supplied to the constructor. */ 046 public static final char DEFAULT_SEPARATOR = ','; 047 048 /** 049 * The default quote character to use if none is supplied to the 050 * constructor. 051 */ 052 public static final char DEFAULT_QUOTE_CHARACTER = '"'; 053 054 /** 055 * The default line to start reading. 056 */ 057 public static final int DEFAULT_SKIP_LINES = 0; 058 059 /** 060 * Constructs CSVReader using a comma for the separator. 061 * 062 * @param reader 063 * the reader to an underlying CSV source. 064 */ 065 public CSVReader(Reader reader) { 066 this(reader, DEFAULT_SEPARATOR); 067 } 068 069 /** 070 * Constructs CSVReader with supplied separator. 071 * 072 * @param reader 073 * the reader to an underlying CSV source. 074 * @param separator 075 * the delimiter to use for separating entries. 076 */ 077 public CSVReader(Reader reader, char separator) { 078 this(reader, separator, DEFAULT_QUOTE_CHARACTER); 079 } 080 081 082 083 /** 084 * Constructs CSVReader with supplied separator and quote char. 085 * 086 * @param reader 087 * the reader to an underlying CSV source. 088 * @param separator 089 * the delimiter to use for separating entries 090 * @param quotechar 091 * the character to use for quoted elements 092 */ 093 public CSVReader(Reader reader, char separator, char quotechar) { 094 this(reader, separator, quotechar, DEFAULT_SKIP_LINES); 095 } 096 097 /** 098 * Constructs CSVReader with supplied separator and quote char. 099 * 100 * @param reader 101 * the reader to an underlying CSV source. 102 * @param separator 103 * the delimiter to use for separating entries 104 * @param quotechar 105 * the character to use for quoted elements 106 * @param line 107 * the line number to skip for start reading 108 */ 109 public CSVReader(Reader reader, char separator, char quotechar, int line) { 110 this.br = new BufferedReader(reader); 111 this.separator = separator; 112 this.quotechar = quotechar; 113 this.skipLines = line; 114 } 115 116 /** 117 * Reads the entire file into a List with each element being a String[] of 118 * tokens. 119 * 120 * @return a List of String[], with each String[] representing a line of the 121 * file. 122 * 123 * @throws IOException 124 * if bad things happen during the read 125 */ 126 public List readAll() throws IOException { 127 128 List allElements = new ArrayList(); 129 while (hasNext) { 130 String[] nextLineAsTokens = readNext(); 131 if (nextLineAsTokens != null) 132 allElements.add(nextLineAsTokens); 133 } 134 return allElements; 135 136 } 137 138 /** 139 * Reads the next line from the buffer and converts to a string array. 140 * 141 * @return a string array with each comma-separated element as a separate 142 * entry. 143 * 144 * @throws IOException 145 * if bad things happen during the read 146 */ 147 public String[] readNext() throws IOException { 148 149 String nextLine = getNextLine(); 150 return hasNext ? parseLine(nextLine) : null; 151 } 152 153 /** 154 * Reads the next line from the file. 155 * 156 * @return the next line from the file without trailing newline 157 * @throws IOException 158 * if bad things happen during the read 159 */ 160 private String getNextLine() throws IOException { 161 if (!this.linesSkiped) { 162 for (int i = 0; i < skipLines; i++) { 163 br.readLine(); 164 } 165 this.linesSkiped = true; 166 } 167 String nextLine = br.readLine(); 168 if (nextLine == null) { 169 hasNext = false; 170 } 171 return hasNext ? nextLine : null; 172 } 173 174 /** 175 * Parses an incoming String and returns an array of elements. 176 * 177 * @param nextLine 178 * the string to parse 179 * @return the comma-tokenized list of elements, or null if nextLine is null 180 * @throws IOException if bad things happen during the read 181 */ 182 private String[] parseLine(String nextLine) throws IOException { 183 184 if (nextLine == null) { 185 return null; 186 } 187 188 List tokensOnThisLine = new ArrayList(); 189 StringBuffer sb = new StringBuffer(); 190 boolean inQuotes = false; 191 do { 192 if (inQuotes) { 193 // continuing a quoted section, reappend newline 194 sb.append("\n"); 195 nextLine = getNextLine(); 196 if (nextLine == null) 197 break; 198 } 199 for (int i = 0; i < nextLine.length(); i++) { 200 201 char c = nextLine.charAt(i); 202 if (c == quotechar) { 203 // this gets complex... the quote may end a quoted block, or escape another quote. 204 // do a 1-char lookahead: 205 if( inQuotes // we are in quotes, therefore there can be escaped quotes in here. 206 && nextLine.length() > (i+1) // there is indeed another character to check. 207 && nextLine.charAt(i+1) == quotechar ){ // ..and that char. is a quote also. 208 // we have two quote chars in a row == one quote char, so consume them both and 209 // put one on the token. we do *not* exit the quoted text. 210 sb.append(nextLine.charAt(i+1)); 211 i++; 212 }else{ 213 inQuotes = !inQuotes; 214 // the tricky case of an embedded quote in the middle: a,bc"d"ef,g 215 if(i>2 //not on the begining of the line 216 && nextLine.charAt(i-1) != this.separator //not at the begining of an escape sequence 217 && nextLine.length()>(i+1) && 218 nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence 219 ){ 220 sb.append(c); 221 } 222 } 223 } else if (c == separator && !inQuotes) { 224 tokensOnThisLine.add(sb.toString()); 225 sb = new StringBuffer(); // start work on next token 226 } else { 227 sb.append(c); 228 } 229 } 230 } while (inQuotes); 231 tokensOnThisLine.add(sb.toString()); 232 return (String[]) tokensOnThisLine.toArray(new String[0]); 233 234 } 235 236 /** 237 * Closes the underlying reader. 238 * 239 * @throws IOException if the close fails 240 */ 241 public void close() throws IOException{ 242 br.close(); 243 } 244 245}