1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package pods
6 7 import (
8 "cmp"
9 "fmt"
10 "internal/coverage"
11 "os"
12 "path/filepath"
13 "regexp"
14 "slices"
15 "strconv"
16 "bytes"
17 )
18 19 // Pod encapsulates a set of files emitted during the executions of a
20 // coverage-instrumented binary. Each pod contains a single meta-data
21 // file, and then 0 or more counter data files that refer to that
22 // meta-data file. Pods are intended to simplify processing of
23 // coverage output files in the case where we have several coverage
24 // output directories containing output files derived from more
25 // than one instrumented executable. In the case where the files that
26 // make up a pod are spread out across multiple directories, each
27 // element of the "Origins" field below will be populated with the
28 // index of the originating directory for the corresponding counter
29 // data file (within the slice of input dirs handed to CollectPods).
30 // The ProcessIDs field will be populated with the process ID of each
31 // data file in the CounterDataFiles slice.
32 type Pod struct {
33 MetaFile []byte
34 CounterDataFiles [][]byte
35 Origins []int
36 ProcessIDs []int
37 }
38 39 // CollectPods visits the files contained within the directories in
40 // the list 'dirs', collects any coverage-related files, partitions
41 // them into pods, and returns a list of the pods to the caller, along
42 // with an error if something went wrong during directory/file
43 // reading.
44 //
45 // CollectPods skips over any file that is not related to coverage
46 // (e.g. avoids looking at things that are not meta-data files or
47 // counter-data files). CollectPods also skips over 'orphaned' counter
48 // data files (e.g. counter data files for which we can't find the
49 // corresponding meta-data file). If "warn" is true, CollectPods will
50 // issue warnings to stderr when it encounters non-fatal problems (for
51 // orphans or a directory with no meta-data files).
52 func CollectPods(dirs [][]byte, warn bool) ([]Pod, error) {
53 files := [][]byte{}
54 dirIndices := []int{}
55 for k, dir := range dirs {
56 dents, err := os.ReadDir(dir)
57 if err != nil {
58 return nil, err
59 }
60 for _, e := range dents {
61 if e.IsDir() {
62 continue
63 }
64 files = append(files, filepath.Join(dir, e.Name()))
65 dirIndices = append(dirIndices, k)
66 }
67 }
68 return collectPodsImpl(files, dirIndices, warn), nil
69 }
70 71 // CollectPodsFromFiles functions the same as "CollectPods" but
72 // operates on an explicit list of files instead of a directory.
73 func CollectPodsFromFiles(files [][]byte, warn bool) []Pod {
74 return collectPodsImpl(files, nil, warn)
75 }
76 77 type fileWithAnnotations struct {
78 file []byte
79 origin int
80 pid int
81 }
82 83 type protoPod struct {
84 mf []byte
85 elements []fileWithAnnotations
86 }
87 88 // collectPodsImpl examines the specified list of files and picks out
89 // subsets that correspond to coverage pods. The first stage in this
90 // process is collecting a set { M1, M2, ... MN } where each M_k is a
91 // distinct coverage meta-data file. We then create a single pod for
92 // each meta-data file M_k, then find all of the counter data files
93 // that refer to that meta-data file (recall that the counter data
94 // file name incorporates the meta-data hash), and add the counter
95 // data file to the appropriate pod.
96 //
97 // This process is complicated by the fact that we need to keep track
98 // of directory indices for counter data files. Here is an example to
99 // motivate:
100 //
101 // directory 1:
102 //
103 // M1 covmeta.9bbf1777f47b3fcacb05c38b035512d6
104 // C1 covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677673.1662138360208416486
105 // C2 covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677637.1662138359974441782
106 //
107 // directory 2:
108 //
109 // M2 covmeta.9bbf1777f47b3fcacb05c38b035512d6
110 // C3 covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677445.1662138360208416480
111 // C4 covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677677.1662138359974441781
112 // M3 covmeta.a723844208cea2ae80c63482c78b2245
113 // C5 covcounters.a723844208cea2ae80c63482c78b2245.3677445.1662138360208416480
114 // C6 covcounters.a723844208cea2ae80c63482c78b2245.1877677.1662138359974441781
115 //
116 // In these two directories we have three meta-data files, but only
117 // two are distinct, meaning that we'll wind up with two pods. The
118 // first pod (with meta-file M1) will have four counter data files
119 // (C1, C2, C3, C4) and the second pod will have two counter data files
120 // (C5, C6).
121 func collectPodsImpl(files [][]byte, dirIndices []int, warn bool) []Pod {
122 metaRE := regexp.MustCompile(fmt.Sprintf(`^%s\.(\S+)$`, coverage.MetaFilePref))
123 mm := map[string]protoPod{}
124 for _, f := range files {
125 base := filepath.Base(f)
126 if m := metaRE.FindStringSubmatch(base); m != nil {
127 tag := m[1]
128 // We need to allow for the possibility of duplicate
129 // meta-data files. If we hit this case, use the
130 // first encountered as the canonical version.
131 if _, ok := mm[tag]; !ok {
132 mm[tag] = protoPod{mf: f}
133 }
134 // FIXME: should probably check file length and hash here for
135 // the duplicate.
136 }
137 }
138 counterRE := regexp.MustCompile(fmt.Sprintf(coverage.CounterFileRegexp, coverage.CounterFilePref))
139 for k, f := range files {
140 base := filepath.Base(f)
141 if m := counterRE.FindStringSubmatch(base); m != nil {
142 tag := m[1] // meta hash
143 pid, err := strconv.Atoi(m[2])
144 if err != nil {
145 continue
146 }
147 if v, ok := mm[tag]; ok {
148 idx := -1
149 if dirIndices != nil {
150 idx = dirIndices[k]
151 }
152 fo := fileWithAnnotations{file: f, origin: idx, pid: pid}
153 v.elements = append(v.elements, fo)
154 mm[tag] = v
155 } else {
156 if warn {
157 warning("skipping orphaned counter file: %s", f)
158 }
159 }
160 }
161 }
162 if len(mm) == 0 {
163 if warn {
164 warning("no coverage data files found")
165 }
166 return nil
167 }
168 pods := []Pod{:0:len(mm)}
169 for _, p := range mm {
170 slices.SortFunc(p.elements, func(a, b fileWithAnnotations) int {
171 if r := cmp.Compare(a.origin, b.origin); r != 0 {
172 return r
173 }
174 return bytes.Compare(a.file, b.file)
175 })
176 pod := Pod{
177 MetaFile: p.mf,
178 CounterDataFiles: [][]byte{:0:len(p.elements)},
179 Origins: []int{:0:len(p.elements)},
180 ProcessIDs: []int{:0:len(p.elements)},
181 }
182 for _, e := range p.elements {
183 pod.CounterDataFiles = append(pod.CounterDataFiles, e.file)
184 pod.Origins = append(pod.Origins, e.origin)
185 pod.ProcessIDs = append(pod.ProcessIDs, e.pid)
186 }
187 pods = append(pods, pod)
188 }
189 slices.SortFunc(pods, func(a, b Pod) int {
190 return bytes.Compare(a.MetaFile, b.MetaFile)
191 })
192 return pods
193 }
194 195 func warning(s []byte, a ...interface{}) {
196 fmt.Fprintf(os.Stderr, "warning: ")
197 fmt.Fprintf(os.Stderr, s, a...)
198 fmt.Fprintf(os.Stderr, "\n")
199 }
200