parser.go raw

   1  // Copyright 2015 Unknwon
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License"): you may
   4  // not use this file except in compliance with the License. You may obtain
   5  // a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11  // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12  // License for the specific language governing permissions and limitations
  13  // under the License.
  14  
  15  package ini
  16  
  17  import (
  18  	"bufio"
  19  	"bytes"
  20  	"fmt"
  21  	"io"
  22  	"regexp"
  23  	"strconv"
  24  	"strings"
  25  	"unicode"
  26  )
  27  
  28  const minReaderBufferSize = 4096
  29  
  30  var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
  31  
  32  type parserOptions struct {
  33  	IgnoreContinuation          bool
  34  	IgnoreInlineComment         bool
  35  	AllowPythonMultilineValues  bool
  36  	SpaceBeforeInlineComment    bool
  37  	UnescapeValueDoubleQuotes   bool
  38  	UnescapeValueCommentSymbols bool
  39  	PreserveSurroundedQuote     bool
  40  	DebugFunc                   DebugFunc
  41  	ReaderBufferSize            int
  42  }
  43  
  44  type parser struct {
  45  	buf     *bufio.Reader
  46  	options parserOptions
  47  
  48  	isEOF   bool
  49  	count   int
  50  	comment *bytes.Buffer
  51  }
  52  
  53  func (p *parser) debug(format string, args ...interface{}) {
  54  	if p.options.DebugFunc != nil {
  55  		p.options.DebugFunc(fmt.Sprintf(format, args...))
  56  	}
  57  }
  58  
  59  func newParser(r io.Reader, opts parserOptions) *parser {
  60  	size := opts.ReaderBufferSize
  61  	if size < minReaderBufferSize {
  62  		size = minReaderBufferSize
  63  	}
  64  
  65  	return &parser{
  66  		buf:     bufio.NewReaderSize(r, size),
  67  		options: opts,
  68  		count:   1,
  69  		comment: &bytes.Buffer{},
  70  	}
  71  }
  72  
  73  // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
  74  // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
  75  func (p *parser) BOM() error {
  76  	mask, err := p.buf.Peek(2)
  77  	if err != nil && err != io.EOF {
  78  		return err
  79  	} else if len(mask) < 2 {
  80  		return nil
  81  	}
  82  
  83  	switch {
  84  	case mask[0] == 254 && mask[1] == 255:
  85  		fallthrough
  86  	case mask[0] == 255 && mask[1] == 254:
  87  		_, err = p.buf.Read(mask)
  88  		if err != nil {
  89  			return err
  90  		}
  91  	case mask[0] == 239 && mask[1] == 187:
  92  		mask, err := p.buf.Peek(3)
  93  		if err != nil && err != io.EOF {
  94  			return err
  95  		} else if len(mask) < 3 {
  96  			return nil
  97  		}
  98  		if mask[2] == 191 {
  99  			_, err = p.buf.Read(mask)
 100  			if err != nil {
 101  				return err
 102  			}
 103  		}
 104  	}
 105  	return nil
 106  }
 107  
 108  func (p *parser) readUntil(delim byte) ([]byte, error) {
 109  	data, err := p.buf.ReadBytes(delim)
 110  	if err != nil {
 111  		if err == io.EOF {
 112  			p.isEOF = true
 113  		} else {
 114  			return nil, err
 115  		}
 116  	}
 117  	return data, nil
 118  }
 119  
 120  func cleanComment(in []byte) ([]byte, bool) {
 121  	i := bytes.IndexAny(in, "#;")
 122  	if i == -1 {
 123  		return nil, false
 124  	}
 125  	return in[i:], true
 126  }
 127  
 128  func readKeyName(delimiters string, in []byte) (string, int, error) {
 129  	line := string(in)
 130  
 131  	// Check if key name surrounded by quotes.
 132  	var keyQuote string
 133  	if line[0] == '"' {
 134  		if len(line) > 6 && line[0:3] == `"""` {
 135  			keyQuote = `"""`
 136  		} else {
 137  			keyQuote = `"`
 138  		}
 139  	} else if line[0] == '`' {
 140  		keyQuote = "`"
 141  	}
 142  
 143  	// Get out key name
 144  	var endIdx int
 145  	if len(keyQuote) > 0 {
 146  		startIdx := len(keyQuote)
 147  		// FIXME: fail case -> """"""name"""=value
 148  		pos := strings.Index(line[startIdx:], keyQuote)
 149  		if pos == -1 {
 150  			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
 151  		}
 152  		pos += startIdx
 153  
 154  		// Find key-value delimiter
 155  		i := strings.IndexAny(line[pos+startIdx:], delimiters)
 156  		if i < 0 {
 157  			return "", -1, ErrDelimiterNotFound{line}
 158  		}
 159  		endIdx = pos + i
 160  		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
 161  	}
 162  
 163  	endIdx = strings.IndexAny(line, delimiters)
 164  	if endIdx < 0 {
 165  		return "", -1, ErrDelimiterNotFound{line}
 166  	}
 167  	if endIdx == 0 {
 168  		return "", -1, ErrEmptyKeyName{line}
 169  	}
 170  
 171  	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
 172  }
 173  
 174  func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
 175  	for {
 176  		data, err := p.readUntil('\n')
 177  		if err != nil {
 178  			return "", err
 179  		}
 180  		next := string(data)
 181  
 182  		pos := strings.LastIndex(next, valQuote)
 183  		if pos > -1 {
 184  			val += next[:pos]
 185  
 186  			comment, has := cleanComment([]byte(next[pos:]))
 187  			if has {
 188  				p.comment.Write(bytes.TrimSpace(comment))
 189  			}
 190  			break
 191  		}
 192  		val += next
 193  		if p.isEOF {
 194  			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
 195  		}
 196  	}
 197  	return val, nil
 198  }
 199  
 200  func (p *parser) readContinuationLines(val string) (string, error) {
 201  	for {
 202  		data, err := p.readUntil('\n')
 203  		if err != nil {
 204  			return "", err
 205  		}
 206  		next := strings.TrimSpace(string(data))
 207  
 208  		if len(next) == 0 {
 209  			break
 210  		}
 211  		val += next
 212  		if val[len(val)-1] != '\\' {
 213  			break
 214  		}
 215  		val = val[:len(val)-1]
 216  	}
 217  	return val, nil
 218  }
 219  
 220  // hasSurroundedQuote check if and only if the first and last characters
 221  // are quotes \" or \'.
 222  // It returns false if any other parts also contain same kind of quotes.
 223  func hasSurroundedQuote(in string, quote byte) bool {
 224  	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
 225  		strings.IndexByte(in[1:], quote) == len(in)-2
 226  }
 227  
 228  func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
 229  
 230  	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
 231  	if len(line) == 0 {
 232  		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
 233  			return p.readPythonMultilines(line, bufferSize)
 234  		}
 235  		return "", nil
 236  	}
 237  
 238  	var valQuote string
 239  	if len(line) > 3 && line[0:3] == `"""` {
 240  		valQuote = `"""`
 241  	} else if line[0] == '`' {
 242  		valQuote = "`"
 243  	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
 244  		valQuote = `"`
 245  	}
 246  
 247  	if len(valQuote) > 0 {
 248  		startIdx := len(valQuote)
 249  		pos := strings.LastIndex(line[startIdx:], valQuote)
 250  		// Check for multi-line value
 251  		if pos == -1 {
 252  			return p.readMultilines(line, line[startIdx:], valQuote)
 253  		}
 254  
 255  		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
 256  			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
 257  		}
 258  		return line[startIdx : pos+startIdx], nil
 259  	}
 260  
 261  	lastChar := line[len(line)-1]
 262  	// Won't be able to reach here if value only contains whitespace
 263  	line = strings.TrimSpace(line)
 264  	trimmedLastChar := line[len(line)-1]
 265  
 266  	// Check continuation lines when desired
 267  	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
 268  		return p.readContinuationLines(line[:len(line)-1])
 269  	}
 270  
 271  	// Check if ignore inline comment
 272  	if !p.options.IgnoreInlineComment {
 273  		var i int
 274  		if p.options.SpaceBeforeInlineComment {
 275  			i = strings.Index(line, " #")
 276  			if i == -1 {
 277  				i = strings.Index(line, " ;")
 278  			}
 279  
 280  		} else {
 281  			i = strings.IndexAny(line, "#;")
 282  		}
 283  
 284  		if i > -1 {
 285  			p.comment.WriteString(line[i:])
 286  			line = strings.TrimSpace(line[:i])
 287  		}
 288  
 289  	}
 290  
 291  	// Trim single and double quotes
 292  	if (hasSurroundedQuote(line, '\'') ||
 293  		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
 294  		line = line[1 : len(line)-1]
 295  	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
 296  		line = strings.ReplaceAll(line, `\;`, ";")
 297  		line = strings.ReplaceAll(line, `\#`, "#")
 298  	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
 299  		return p.readPythonMultilines(line, bufferSize)
 300  	}
 301  
 302  	return line, nil
 303  }
 304  
 305  func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
 306  	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
 307  	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
 308  
 309  	for {
 310  		peekData, peekErr := peekBuffer.ReadBytes('\n')
 311  		if peekErr != nil && peekErr != io.EOF {
 312  			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
 313  			return "", peekErr
 314  		}
 315  
 316  		p.debug("readPythonMultilines: parsing %q", string(peekData))
 317  
 318  		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
 319  		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
 320  		for n, v := range peekMatches {
 321  			p.debug("   %d: %q", n, v)
 322  		}
 323  
 324  		// Return if not a Python multiline value.
 325  		if len(peekMatches) != 3 {
 326  			p.debug("readPythonMultilines: end of value, got: %q", line)
 327  			return line, nil
 328  		}
 329  
 330  		// Advance the parser reader (buffer) in-sync with the peek buffer.
 331  		_, err := p.buf.Discard(len(peekData))
 332  		if err != nil {
 333  			p.debug("readPythonMultilines: failed to skip to the end, returning error")
 334  			return "", err
 335  		}
 336  
 337  		line += "\n" + peekMatches[0]
 338  	}
 339  }
 340  
 341  // parse parses data through an io.Reader.
 342  func (f *File) parse(reader io.Reader) (err error) {
 343  	p := newParser(reader, parserOptions{
 344  		IgnoreContinuation:          f.options.IgnoreContinuation,
 345  		IgnoreInlineComment:         f.options.IgnoreInlineComment,
 346  		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
 347  		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
 348  		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
 349  		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
 350  		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
 351  		DebugFunc:                   f.options.DebugFunc,
 352  		ReaderBufferSize:            f.options.ReaderBufferSize,
 353  	})
 354  	if err = p.BOM(); err != nil {
 355  		return fmt.Errorf("BOM: %v", err)
 356  	}
 357  
 358  	// Ignore error because default section name is never empty string.
 359  	name := DefaultSection
 360  	if f.options.Insensitive || f.options.InsensitiveSections {
 361  		name = strings.ToLower(DefaultSection)
 362  	}
 363  	section, _ := f.NewSection(name)
 364  
 365  	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
 366  	var isLastValueEmpty bool
 367  	var lastRegularKey *Key
 368  
 369  	var line []byte
 370  	var inUnparseableSection bool
 371  
 372  	// NOTE: Iterate and increase `currentPeekSize` until
 373  	// the size of the parser buffer is found.
 374  	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
 375  	parserBufferSize := 0
 376  	// NOTE: Peek 4kb at a time.
 377  	currentPeekSize := minReaderBufferSize
 378  
 379  	if f.options.AllowPythonMultilineValues {
 380  		for {
 381  			peekBytes, _ := p.buf.Peek(currentPeekSize)
 382  			peekBytesLength := len(peekBytes)
 383  
 384  			if parserBufferSize >= peekBytesLength {
 385  				break
 386  			}
 387  
 388  			currentPeekSize *= 2
 389  			parserBufferSize = peekBytesLength
 390  		}
 391  	}
 392  
 393  	for !p.isEOF {
 394  		line, err = p.readUntil('\n')
 395  		if err != nil {
 396  			return err
 397  		}
 398  
 399  		if f.options.AllowNestedValues &&
 400  			isLastValueEmpty && len(line) > 0 {
 401  			if line[0] == ' ' || line[0] == '\t' {
 402  				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
 403  				if err != nil {
 404  					return err
 405  				}
 406  				continue
 407  			}
 408  		}
 409  
 410  		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
 411  		if len(line) == 0 {
 412  			continue
 413  		}
 414  
 415  		// Comments
 416  		if line[0] == '#' || line[0] == ';' {
 417  			// Note: we do not care ending line break,
 418  			// it is needed for adding second line,
 419  			// so just clean it once at the end when set to value.
 420  			p.comment.Write(line)
 421  			continue
 422  		}
 423  
 424  		// Section
 425  		if line[0] == '[' {
 426  			// Read to the next ']' (TODO: support quoted strings)
 427  			closeIdx := bytes.LastIndexByte(line, ']')
 428  			if closeIdx == -1 {
 429  				return fmt.Errorf("unclosed section: %s", line)
 430  			}
 431  
 432  			name := string(line[1:closeIdx])
 433  			section, err = f.NewSection(name)
 434  			if err != nil {
 435  				return err
 436  			}
 437  
 438  			comment, has := cleanComment(line[closeIdx+1:])
 439  			if has {
 440  				p.comment.Write(comment)
 441  			}
 442  
 443  			section.Comment = strings.TrimSpace(p.comment.String())
 444  
 445  			// Reset auto-counter and comments
 446  			p.comment.Reset()
 447  			p.count = 1
 448  			// Nested values can't span sections
 449  			isLastValueEmpty = false
 450  
 451  			inUnparseableSection = false
 452  			for i := range f.options.UnparseableSections {
 453  				if f.options.UnparseableSections[i] == name ||
 454  					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
 455  					inUnparseableSection = true
 456  					continue
 457  				}
 458  			}
 459  			continue
 460  		}
 461  
 462  		if inUnparseableSection {
 463  			section.isRawSection = true
 464  			section.rawBody += string(line)
 465  			continue
 466  		}
 467  
 468  		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
 469  		if err != nil {
 470  			switch {
 471  			// Treat as boolean key when desired, and whole line is key name.
 472  			case IsErrDelimiterNotFound(err):
 473  				switch {
 474  				case f.options.AllowBooleanKeys:
 475  					kname, err := p.readValue(line, parserBufferSize)
 476  					if err != nil {
 477  						return err
 478  					}
 479  					key, err := section.NewBooleanKey(kname)
 480  					if err != nil {
 481  						return err
 482  					}
 483  					key.Comment = strings.TrimSpace(p.comment.String())
 484  					p.comment.Reset()
 485  					continue
 486  
 487  				case f.options.SkipUnrecognizableLines:
 488  					continue
 489  				}
 490  			case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines:
 491  				continue
 492  			}
 493  			return err
 494  		}
 495  
 496  		// Auto increment.
 497  		isAutoIncr := false
 498  		if kname == "-" {
 499  			isAutoIncr = true
 500  			kname = "#" + strconv.Itoa(p.count)
 501  			p.count++
 502  		}
 503  
 504  		value, err := p.readValue(line[offset:], parserBufferSize)
 505  		if err != nil {
 506  			return err
 507  		}
 508  		isLastValueEmpty = len(value) == 0
 509  
 510  		key, err := section.NewKey(kname, value)
 511  		if err != nil {
 512  			return err
 513  		}
 514  		key.isAutoIncrement = isAutoIncr
 515  		key.Comment = strings.TrimSpace(p.comment.String())
 516  		p.comment.Reset()
 517  		lastRegularKey = key
 518  	}
 519  	return nil
 520  }
 521