Source file src/pkg/encoding/csv/reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83 func (e *ParseError) Unwrap() error { return e.Err }
84
85
86 var (
87 ErrTrailingComma = errors.New("extra delimiter at end of line")
88 ErrBareQuote = errors.New("bare \" in non-quoted-field")
89 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
90 ErrFieldCount = errors.New("wrong number of fields")
91 )
92
93 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
94
95 func validDelim(r rune) bool {
96 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
97 }
98
99
100
101
102
103
104
105
106
107
108 type Reader struct {
109
110
111
112
113 Comma rune
114
115
116
117
118
119
120
121
122 Comment rune
123
124
125
126
127
128
129
130 FieldsPerRecord int
131
132
133
134 LazyQuotes bool
135
136
137
138 TrimLeadingSpace bool
139
140
141
142
143 ReuseRecord bool
144
145 TrailingComma bool
146
147 r *bufio.Reader
148
149
150 numLine int
151
152
153 rawBuffer []byte
154
155
156
157
158
159 recordBuffer []byte
160
161
162
163 fieldIndexes []int
164
165
166 lastRecord []string
167 }
168
169
170 func NewReader(r io.Reader) *Reader {
171 return &Reader{
172 Comma: ',',
173 r: bufio.NewReader(r),
174 }
175 }
176
177
178
179
180
181
182
183
184
185 func (r *Reader) Read() (record []string, err error) {
186 if r.ReuseRecord {
187 record, err = r.readRecord(r.lastRecord)
188 r.lastRecord = record
189 } else {
190 record, err = r.readRecord(nil)
191 }
192 return record, err
193 }
194
195
196
197
198
199
200 func (r *Reader) ReadAll() (records [][]string, err error) {
201 for {
202 record, err := r.readRecord(nil)
203 if err == io.EOF {
204 return records, nil
205 }
206 if err != nil {
207 return nil, err
208 }
209 records = append(records, record)
210 }
211 }
212
213
214
215
216
217 func (r *Reader) readLine() ([]byte, error) {
218 line, err := r.r.ReadSlice('\n')
219 if err == bufio.ErrBufferFull {
220 r.rawBuffer = append(r.rawBuffer[:0], line...)
221 for err == bufio.ErrBufferFull {
222 line, err = r.r.ReadSlice('\n')
223 r.rawBuffer = append(r.rawBuffer, line...)
224 }
225 line = r.rawBuffer
226 }
227 if len(line) > 0 && err == io.EOF {
228 err = nil
229
230 if line[len(line)-1] == '\r' {
231 line = line[:len(line)-1]
232 }
233 }
234 r.numLine++
235
236 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
237 line[n-2] = '\n'
238 line = line[:n-1]
239 }
240 return line, err
241 }
242
243
244 func lengthNL(b []byte) int {
245 if len(b) > 0 && b[len(b)-1] == '\n' {
246 return 1
247 }
248 return 0
249 }
250
251
252 func nextRune(b []byte) rune {
253 r, _ := utf8.DecodeRune(b)
254 return r
255 }
256
257 func (r *Reader) readRecord(dst []string) ([]string, error) {
258 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
259 return nil, errInvalidDelim
260 }
261
262
263 var line, fullLine []byte
264 var errRead error
265 for errRead == nil {
266 line, errRead = r.readLine()
267 if r.Comment != 0 && nextRune(line) == r.Comment {
268 line = nil
269 continue
270 }
271 if errRead == nil && len(line) == lengthNL(line) {
272 line = nil
273 continue
274 }
275 fullLine = line
276 break
277 }
278 if errRead == io.EOF {
279 return nil, errRead
280 }
281
282
283 var err error
284 const quoteLen = len(`"`)
285 commaLen := utf8.RuneLen(r.Comma)
286 recLine := r.numLine
287 r.recordBuffer = r.recordBuffer[:0]
288 r.fieldIndexes = r.fieldIndexes[:0]
289 parseField:
290 for {
291 if r.TrimLeadingSpace {
292 line = bytes.TrimLeftFunc(line, unicode.IsSpace)
293 }
294 if len(line) == 0 || line[0] != '"' {
295
296 i := bytes.IndexRune(line, r.Comma)
297 field := line
298 if i >= 0 {
299 field = field[:i]
300 } else {
301 field = field[:len(field)-lengthNL(field)]
302 }
303
304 if !r.LazyQuotes {
305 if j := bytes.IndexByte(field, '"'); j >= 0 {
306 col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
307 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
308 break parseField
309 }
310 }
311 r.recordBuffer = append(r.recordBuffer, field...)
312 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
313 if i >= 0 {
314 line = line[i+commaLen:]
315 continue parseField
316 }
317 break parseField
318 } else {
319
320 line = line[quoteLen:]
321 for {
322 i := bytes.IndexByte(line, '"')
323 if i >= 0 {
324
325 r.recordBuffer = append(r.recordBuffer, line[:i]...)
326 line = line[i+quoteLen:]
327 switch rn := nextRune(line); {
328 case rn == '"':
329
330 r.recordBuffer = append(r.recordBuffer, '"')
331 line = line[quoteLen:]
332 case rn == r.Comma:
333
334 line = line[commaLen:]
335 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
336 continue parseField
337 case lengthNL(line) == len(line):
338
339 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
340 break parseField
341 case r.LazyQuotes:
342
343 r.recordBuffer = append(r.recordBuffer, '"')
344 default:
345
346 col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
347 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
348 break parseField
349 }
350 } else if len(line) > 0 {
351
352 r.recordBuffer = append(r.recordBuffer, line...)
353 if errRead != nil {
354 break parseField
355 }
356 line, errRead = r.readLine()
357 if errRead == io.EOF {
358 errRead = nil
359 }
360 fullLine = line
361 } else {
362
363 if !r.LazyQuotes && errRead == nil {
364 col := utf8.RuneCount(fullLine)
365 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
366 break parseField
367 }
368 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
369 break parseField
370 }
371 }
372 }
373 }
374 if err == nil {
375 err = errRead
376 }
377
378
379
380 str := string(r.recordBuffer)
381 dst = dst[:0]
382 if cap(dst) < len(r.fieldIndexes) {
383 dst = make([]string, len(r.fieldIndexes))
384 }
385 dst = dst[:len(r.fieldIndexes)]
386 var preIdx int
387 for i, idx := range r.fieldIndexes {
388 dst[i] = str[preIdx:idx]
389 preIdx = idx
390 }
391
392
393 if r.FieldsPerRecord > 0 {
394 if len(dst) != r.FieldsPerRecord && err == nil {
395 err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
396 }
397 } else if r.FieldsPerRecord == 0 {
398 r.FieldsPerRecord = len(dst)
399 }
400 return dst, err
401 }
402
View as plain text