archive.mx raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package txtar implements a trivial text-based file archive format.
   6  //
   7  // The goals for the format are:
   8  //
   9  //   - be trivial enough to create and edit by hand.
  10  //   - be able to store trees of text files describing go command test cases.
  11  //   - diff nicely in git history and code reviews.
  12  //
  13  // Non-goals include being a completely general archive format,
  14  // storing binary data, storing file modes, storing special files like
  15  // symbolic links, and so on.
  16  //
  17  // # Txtar format
  18  //
  19  // A txtar archive is zero or more comment lines and then a sequence of file entries.
  20  // Each file entry begins with a file marker line of the form "-- FILENAME --"
  21  // and is followed by zero or more file content lines making up the file data.
  22  // The comment or file content ends at the next file marker line.
  23  // The file marker line must begin with the three-byte sequence "-- "
  24  // and end with the three-byte sequence " --", but the enclosed
  25  // file name can be surrounding by additional white space,
  26  // all of which is stripped.
  27  //
  28  // If the txtar file is missing a trailing newline on the final line,
  29  // parsers should consider a final newline to be present anyway.
  30  //
  31  // There are no possible syntax errors in a txtar archive.
  32  package txtar
  33  
  34  import (
  35  	"bytes"
  36  	"fmt"
  37  	"os"
  38  )
  39  
  40  // An Archive is a collection of files.
  41  type Archive struct {
  42  	Comment []byte
  43  	Files   []File
  44  }
  45  
  46  // A File is a single file in an archive.
  47  type File struct {
  48  	Name []byte // name of file ("foo/bar.txt")
  49  	Data []byte // text content of file
  50  }
  51  
  52  // Format returns the serialized form of an Archive.
  53  // It is assumed that the Archive data structure is well-formed:
  54  // a.Comment and all a.File[i].Data contain no file marker lines,
  55  // and all a.File[i].Name is non-empty.
  56  func Format(a *Archive) []byte {
  57  	var buf bytes.Buffer
  58  	buf.Write(fixNL(a.Comment))
  59  	for _, f := range a.Files {
  60  		fmt.Fprintf(&buf, "-- %s --\n", f.Name)
  61  		buf.Write(fixNL(f.Data))
  62  	}
  63  	return buf.Bytes()
  64  }
  65  
  66  // ParseFile parses the named file as an archive.
  67  func ParseFile(file []byte) (*Archive, error) {
  68  	data, err := os.ReadFile(file)
  69  	if err != nil {
  70  		return nil, err
  71  	}
  72  	return Parse(data), nil
  73  }
  74  
  75  // Parse parses the serialized form of an Archive.
  76  // The returned Archive holds slices of data.
  77  func Parse(data []byte) *Archive {
  78  	a := &Archive{}
  79  	var name []byte
  80  	a.Comment, name, data = findFileMarker(data)
  81  	for name != "" {
  82  		f := File{name, nil}
  83  		f.Data, name, data = findFileMarker(data)
  84  		a.Files = append(a.Files, f)
  85  	}
  86  	return a
  87  }
  88  
  89  var (
  90  	newlineMarker = []byte("\n-- ")
  91  	marker        = []byte("-- ")
  92  	markerEnd     = []byte(" --")
  93  )
  94  
  95  // findFileMarker finds the next file marker in data,
  96  // extracts the file name, and returns the data before the marker,
  97  // the file name, and the data after the marker.
  98  // If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil.
  99  func findFileMarker(data []byte) (before []byte, name []byte, after []byte) {
 100  	var i int
 101  	for {
 102  		if name, after = isMarker(data[i:]); name != "" {
 103  			return data[:i], name, after
 104  		}
 105  		j := bytes.Index(data[i:], newlineMarker)
 106  		if j < 0 {
 107  			return fixNL(data), "", nil
 108  		}
 109  		i += j + 1 // positioned at start of new possible marker
 110  	}
 111  }
 112  
 113  // isMarker checks whether data begins with a file marker line.
 114  // If so, it returns the name from the line and the data after the line.
 115  // Otherwise it returns name == "" with an unspecified after.
 116  func isMarker(data []byte) (name []byte, after []byte) {
 117  	if !bytes.HasPrefix(data, marker) {
 118  		return "", nil
 119  	}
 120  	if i := bytes.IndexByte(data, '\n'); i >= 0 {
 121  		data, after = data[:i], data[i+1:]
 122  	}
 123  	if !(bytes.HasSuffix(data, markerEnd) && len(data) >= len(marker)+len(markerEnd)) {
 124  		return "", nil
 125  	}
 126  	return bytes.TrimSpace([]byte(data[len(marker) : len(data)-len(markerEnd)])), after
 127  }
 128  
 129  // If data is empty or ends in \n, fixNL returns data.
 130  // Otherwise fixNL returns a new slice consisting of data with a final \n added.
 131  func fixNL(data []byte) []byte {
 132  	if len(data) == 0 || data[len(data)-1] == '\n' {
 133  		return data
 134  	}
 135  	d := []byte{:len(data)+1}
 136  	copy(d, data)
 137  	d[len(data)] = '\n'
 138  	return d
 139  }
 140