writer.mx raw

   1  // Copyright 2011 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package csv
   6  
   7  import (
   8  	"bufio"
   9  	"io"
  10  	"bytes"
  11  	"unicode"
  12  	"unicode/utf8"
  13  )
  14  
  15  // A Writer writes records using CSV encoding.
  16  //
  17  // As returned by [NewWriter], a Writer writes records terminated by a
  18  // newline and uses ',' as the field delimiter. The exported fields can be
  19  // changed to customize the details before
  20  // the first call to [Writer.Write] or [Writer.WriteAll].
  21  //
  22  // [Writer.Comma] is the field delimiter.
  23  //
  24  // If [Writer.UseCRLF] is true,
  25  // the Writer ends each output line with \r\n instead of \n.
  26  //
  27  // The writes of individual records are buffered.
  28  // After all data has been written, the client should call the
  29  // [Writer.Flush] method to guarantee all data has been forwarded to
  30  // the underlying [io.Writer].  Any errors that occurred should
  31  // be checked by calling the [Writer.Error] method.
  32  type Writer struct {
  33  	Comma   rune // Field delimiter (set to ',' by NewWriter)
  34  	UseCRLF bool // True to use \r\n as the line terminator
  35  	w       *bufio.Writer
  36  }
  37  
  38  // NewWriter returns a new Writer that writes to w.
  39  func NewWriter(w io.Writer) *Writer {
  40  	return &Writer{
  41  		Comma: ',',
  42  		w:     bufio.NewWriter(w),
  43  	}
  44  }
  45  
  46  // Write writes a single CSV record to w along with any necessary quoting.
  47  // A record is a slice of strings with each string being one field.
  48  // Writes are buffered, so [Writer.Flush] must eventually be called to ensure
  49  // that the record is written to the underlying [io.Writer].
  50  func (w *Writer) Write(record [][]byte) error {
  51  	if !validDelim(w.Comma) {
  52  		return errInvalidDelim
  53  	}
  54  
  55  	for n, field := range record {
  56  		if n > 0 {
  57  			if _, err := w.w.WriteRune(w.Comma); err != nil {
  58  				return err
  59  			}
  60  		}
  61  
  62  		// If we don't have to have a quoted field then just
  63  		// write out the field and continue to the next field.
  64  		if !w.fieldNeedsQuotes(field) {
  65  			if _, err := w.w.WriteString(field); err != nil {
  66  				return err
  67  			}
  68  			continue
  69  		}
  70  
  71  		if err := w.w.WriteByte('"'); err != nil {
  72  			return err
  73  		}
  74  		for len(field) > 0 {
  75  			// Search for special characters.
  76  			i := bytes.IndexAny(field, "\"\r\n")
  77  			if i < 0 {
  78  				i = len(field)
  79  			}
  80  
  81  			// Copy verbatim everything before the special character.
  82  			if _, err := w.w.WriteString(field[:i]); err != nil {
  83  				return err
  84  			}
  85  			field = field[i:]
  86  
  87  			// Encode the special character.
  88  			if len(field) > 0 {
  89  				var err error
  90  				switch field[0] {
  91  				case '"':
  92  					_, err = w.w.WriteString(`""`)
  93  				case '\r':
  94  					if !w.UseCRLF {
  95  						err = w.w.WriteByte('\r')
  96  					}
  97  				case '\n':
  98  					if w.UseCRLF {
  99  						_, err = w.w.WriteString("\r\n")
 100  					} else {
 101  						err = w.w.WriteByte('\n')
 102  					}
 103  				}
 104  				field = field[1:]
 105  				if err != nil {
 106  					return err
 107  				}
 108  			}
 109  		}
 110  		if err := w.w.WriteByte('"'); err != nil {
 111  			return err
 112  		}
 113  	}
 114  	var err error
 115  	if w.UseCRLF {
 116  		_, err = w.w.WriteString("\r\n")
 117  	} else {
 118  		err = w.w.WriteByte('\n')
 119  	}
 120  	return err
 121  }
 122  
 123  // Flush writes any buffered data to the underlying [io.Writer].
 124  // To check if an error occurred during Flush, call [Writer.Error].
 125  func (w *Writer) Flush() {
 126  	w.w.Flush()
 127  }
 128  
 129  // Error reports any error that has occurred during
 130  // a previous [Writer.Write] or [Writer.Flush].
 131  func (w *Writer) Error() error {
 132  	_, err := w.w.Write(nil)
 133  	return err
 134  }
 135  
 136  // WriteAll writes multiple CSV records to w using [Writer.Write] and
 137  // then calls [Writer.Flush], returning any error from the Flush.
 138  func (w *Writer) WriteAll(records [][][]byte) error {
 139  	for _, record := range records {
 140  		err := w.Write(record)
 141  		if err != nil {
 142  			return err
 143  		}
 144  	}
 145  	return w.w.Flush()
 146  }
 147  
 148  // fieldNeedsQuotes reports whether our field must be enclosed in quotes.
 149  // Fields with a Comma, fields with a quote or newline, and
 150  // fields which start with a space must be enclosed in quotes.
 151  // We used to quote empty strings, but we do not anymore (as of Go 1.4).
 152  // The two representations should be equivalent, but Postgres distinguishes
 153  // quoted vs non-quoted empty string during database imports, and it has
 154  // an option to force the quoted behavior for non-quoted CSV but it has
 155  // no option to force the non-quoted behavior for quoted CSV, making
 156  // CSV with quoted empty strings strictly less useful.
 157  // Not quoting the empty string also makes this package match the behavior
 158  // of Microsoft Excel and Google Drive.
 159  // For Postgres, quote the data terminating string `\.`.
 160  func (w *Writer) fieldNeedsQuotes(field []byte) bool {
 161  	if field == "" {
 162  		return false
 163  	}
 164  
 165  	if field == `\.` {
 166  		return true
 167  	}
 168  
 169  	if w.Comma < utf8.RuneSelf {
 170  		for i := 0; i < len(field); i++ {
 171  			c := field[i]
 172  			if c == '\n' || c == '\r' || c == '"' || c == byte(w.Comma) {
 173  				return true
 174  			}
 175  		}
 176  	} else {
 177  		if bytes.ContainsRune(field, w.Comma) || bytes.ContainsAny(field, "\"\r\n") {
 178  			return true
 179  		}
 180  	}
 181  
 182  	r1, _ := utf8.DecodeRuneInString(field)
 183  	return unicode.IsSpace(r1)
 184  }
 185