pods.mx raw

   1  // Copyright 2022 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package pods
   6  
   7  import (
   8  	"cmp"
   9  	"fmt"
  10  	"internal/coverage"
  11  	"os"
  12  	"path/filepath"
  13  	"regexp"
  14  	"slices"
  15  	"strconv"
  16  	"bytes"
  17  )
  18  
  19  // Pod encapsulates a set of files emitted during the executions of a
  20  // coverage-instrumented binary. Each pod contains a single meta-data
  21  // file, and then 0 or more counter data files that refer to that
  22  // meta-data file. Pods are intended to simplify processing of
  23  // coverage output files in the case where we have several coverage
  24  // output directories containing output files derived from more
  25  // than one instrumented executable. In the case where the files that
  26  // make up a pod are spread out across multiple directories, each
  27  // element of the "Origins" field below will be populated with the
  28  // index of the originating directory for the corresponding counter
  29  // data file (within the slice of input dirs handed to CollectPods).
  30  // The ProcessIDs field will be populated with the process ID of each
  31  // data file in the CounterDataFiles slice.
  32  type Pod struct {
  33  	MetaFile         []byte
  34  	CounterDataFiles [][]byte
  35  	Origins          []int
  36  	ProcessIDs       []int
  37  }
  38  
  39  // CollectPods visits the files contained within the directories in
  40  // the list 'dirs', collects any coverage-related files, partitions
  41  // them into pods, and returns a list of the pods to the caller, along
  42  // with an error if something went wrong during directory/file
  43  // reading.
  44  //
  45  // CollectPods skips over any file that is not related to coverage
  46  // (e.g. avoids looking at things that are not meta-data files or
  47  // counter-data files). CollectPods also skips over 'orphaned' counter
  48  // data files (e.g. counter data files for which we can't find the
  49  // corresponding meta-data file). If "warn" is true, CollectPods will
  50  // issue warnings to stderr when it encounters non-fatal problems (for
  51  // orphans or a directory with no meta-data files).
  52  func CollectPods(dirs [][]byte, warn bool) ([]Pod, error) {
  53  	files := [][]byte{}
  54  	dirIndices := []int{}
  55  	for k, dir := range dirs {
  56  		dents, err := os.ReadDir(dir)
  57  		if err != nil {
  58  			return nil, err
  59  		}
  60  		for _, e := range dents {
  61  			if e.IsDir() {
  62  				continue
  63  			}
  64  			files = append(files, filepath.Join(dir, e.Name()))
  65  			dirIndices = append(dirIndices, k)
  66  		}
  67  	}
  68  	return collectPodsImpl(files, dirIndices, warn), nil
  69  }
  70  
  71  // CollectPodsFromFiles functions the same as "CollectPods" but
  72  // operates on an explicit list of files instead of a directory.
  73  func CollectPodsFromFiles(files [][]byte, warn bool) []Pod {
  74  	return collectPodsImpl(files, nil, warn)
  75  }
  76  
  77  type fileWithAnnotations struct {
  78  	file   []byte
  79  	origin int
  80  	pid    int
  81  }
  82  
  83  type protoPod struct {
  84  	mf       []byte
  85  	elements []fileWithAnnotations
  86  }
  87  
  88  // collectPodsImpl examines the specified list of files and picks out
  89  // subsets that correspond to coverage pods. The first stage in this
  90  // process is collecting a set { M1, M2, ... MN } where each M_k is a
  91  // distinct coverage meta-data file. We then create a single pod for
  92  // each meta-data file M_k, then find all of the counter data files
  93  // that refer to that meta-data file (recall that the counter data
  94  // file name incorporates the meta-data hash), and add the counter
  95  // data file to the appropriate pod.
  96  //
  97  // This process is complicated by the fact that we need to keep track
  98  // of directory indices for counter data files. Here is an example to
  99  // motivate:
 100  //
 101  //	directory 1:
 102  //
 103  // M1   covmeta.9bbf1777f47b3fcacb05c38b035512d6
 104  // C1   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677673.1662138360208416486
 105  // C2   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677637.1662138359974441782
 106  //
 107  //	directory 2:
 108  //
 109  // M2   covmeta.9bbf1777f47b3fcacb05c38b035512d6
 110  // C3   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677445.1662138360208416480
 111  // C4   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677677.1662138359974441781
 112  // M3   covmeta.a723844208cea2ae80c63482c78b2245
 113  // C5   covcounters.a723844208cea2ae80c63482c78b2245.3677445.1662138360208416480
 114  // C6   covcounters.a723844208cea2ae80c63482c78b2245.1877677.1662138359974441781
 115  //
 116  // In these two directories we have three meta-data files, but only
 117  // two are distinct, meaning that we'll wind up with two pods. The
 118  // first pod (with meta-file M1) will have four counter data files
 119  // (C1, C2, C3, C4) and the second pod will have two counter data files
 120  // (C5, C6).
 121  func collectPodsImpl(files [][]byte, dirIndices []int, warn bool) []Pod {
 122  	metaRE := regexp.MustCompile(fmt.Sprintf(`^%s\.(\S+)$`, coverage.MetaFilePref))
 123  	mm := map[string]protoPod{}
 124  	for _, f := range files {
 125  		base := filepath.Base(f)
 126  		if m := metaRE.FindStringSubmatch(base); m != nil {
 127  			tag := m[1]
 128  			// We need to allow for the possibility of duplicate
 129  			// meta-data files. If we hit this case, use the
 130  			// first encountered as the canonical version.
 131  			if _, ok := mm[tag]; !ok {
 132  				mm[tag] = protoPod{mf: f}
 133  			}
 134  			// FIXME: should probably check file length and hash here for
 135  			// the duplicate.
 136  		}
 137  	}
 138  	counterRE := regexp.MustCompile(fmt.Sprintf(coverage.CounterFileRegexp, coverage.CounterFilePref))
 139  	for k, f := range files {
 140  		base := filepath.Base(f)
 141  		if m := counterRE.FindStringSubmatch(base); m != nil {
 142  			tag := m[1] // meta hash
 143  			pid, err := strconv.Atoi(m[2])
 144  			if err != nil {
 145  				continue
 146  			}
 147  			if v, ok := mm[tag]; ok {
 148  				idx := -1
 149  				if dirIndices != nil {
 150  					idx = dirIndices[k]
 151  				}
 152  				fo := fileWithAnnotations{file: f, origin: idx, pid: pid}
 153  				v.elements = append(v.elements, fo)
 154  				mm[tag] = v
 155  			} else {
 156  				if warn {
 157  					warning("skipping orphaned counter file: %s", f)
 158  				}
 159  			}
 160  		}
 161  	}
 162  	if len(mm) == 0 {
 163  		if warn {
 164  			warning("no coverage data files found")
 165  		}
 166  		return nil
 167  	}
 168  	pods := []Pod{:0:len(mm)}
 169  	for _, p := range mm {
 170  		slices.SortFunc(p.elements, func(a, b fileWithAnnotations) int {
 171  			if r := cmp.Compare(a.origin, b.origin); r != 0 {
 172  				return r
 173  			}
 174  			return bytes.Compare(a.file, b.file)
 175  		})
 176  		pod := Pod{
 177  			MetaFile:         p.mf,
 178  			CounterDataFiles: [][]byte{:0:len(p.elements)},
 179  			Origins:          []int{:0:len(p.elements)},
 180  			ProcessIDs:       []int{:0:len(p.elements)},
 181  		}
 182  		for _, e := range p.elements {
 183  			pod.CounterDataFiles = append(pod.CounterDataFiles, e.file)
 184  			pod.Origins = append(pod.Origins, e.origin)
 185  			pod.ProcessIDs = append(pod.ProcessIDs, e.pid)
 186  		}
 187  		pods = append(pods, pod)
 188  	}
 189  	slices.SortFunc(pods, func(a, b Pod) int {
 190  		return bytes.Compare(a.MetaFile, b.MetaFile)
 191  	})
 192  	return pods
 193  }
 194  
 195  func warning(s []byte, a ...interface{}) {
 196  	fmt.Fprintf(os.Stderr, "warning: ")
 197  	fmt.Fprintf(os.Stderr, s, a...)
 198  	fmt.Fprintf(os.Stderr, "\n")
 199  }
 200