1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // Package cache implements a build artifact cache.
6 //
7 // This package is a slightly modified fork of Go's
8 // cmd/go/internal/cache package.
9 package cache
10 11 import (
12 "bytes"
13 "crypto/sha256"
14 "encoding/hex"
15 "errors"
16 "fmt"
17 "io"
18 "os"
19 "path/filepath"
20 "strconv"
21 "strings"
22 "time"
23 24 "honnef.co/go/tools/internal/renameio"
25 )
26 27 // An ActionID is a cache action key, the hash of a complete description of a
28 // repeatable computation (command line, environment variables,
29 // input file contents, executable contents).
30 type ActionID [HashSize]byte
31 32 // An OutputID is a cache output key, the hash of an output of a computation.
33 type OutputID [HashSize]byte
34 35 // A Cache is a package cache, backed by a file system directory tree.
36 type Cache struct {
37 dir string
38 now func() time.Time
39 salt []byte
40 }
41 42 // Open opens and returns the cache in the given directory.
43 //
44 // It is safe for multiple processes on a single machine to use the
45 // same cache directory in a local file system simultaneously.
46 // They will coordinate using operating system file locks and may
47 // duplicate effort but will not corrupt the cache.
48 //
49 // However, it is NOT safe for multiple processes on different machines
50 // to share a cache directory (for example, if the directory were stored
51 // in a network file system). File locking is notoriously unreliable in
52 // network file systems and may not suffice to protect the cache.
53 func Open(dir string) (*Cache, error) {
54 info, err := os.Stat(dir)
55 if err != nil {
56 return nil, err
57 }
58 if !info.IsDir() {
59 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
60 }
61 for i := 0; i < 256; i++ {
62 name := filepath.Join(dir, fmt.Sprintf("%02x", i))
63 if err := os.MkdirAll(name, 0777); err != nil {
64 return nil, err
65 }
66 }
67 c := &Cache{
68 dir: dir,
69 now: time.Now,
70 }
71 return c, nil
72 }
73 74 func (c *Cache) SetSalt(b []byte) {
75 c.salt = b
76 }
77 78 // fileName returns the name of the file corresponding to the given id.
79 func (c *Cache) fileName(id [HashSize]byte, key string) string {
80 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
81 }
82 83 // An entryNotFoundError indicates that a cache entry was not found, with an
84 // optional underlying reason.
85 type entryNotFoundError struct {
86 Err error
87 }
88 89 func (e *entryNotFoundError) Error() string {
90 if e.Err == nil {
91 return "cache entry not found"
92 }
93 return fmt.Sprintf("cache entry not found: %v", e.Err)
94 }
95 96 func (e *entryNotFoundError) Unwrap() error {
97 return e.Err
98 }
99 100 const (
101 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
102 hexSize = HashSize * 2
103 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
104 )
105 106 // verify controls whether to run the cache in verify mode.
107 // In verify mode, the cache always returns errMissing from Get
108 // but then double-checks in Put that the data being written
109 // exactly matches any existing entry. This provides an easy
110 // way to detect program behavior that would have been different
111 // had the cache entry been returned from Get.
112 //
113 // verify is enabled by setting the environment variable
114 // GODEBUG=gocacheverify=1.
115 var verify = false
116 117 var errVerifyMode = errors.New("gocacheverify=1")
118 119 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
120 var DebugTest = false
121 122 func init() { initEnv() }
123 124 func initEnv() {
125 verify = false
126 debugHash = false
127 debug := strings.Split(os.Getenv("GODEBUG"), ",")
128 for _, f := range debug {
129 if f == "gocacheverify=1" {
130 verify = true
131 }
132 if f == "gocachehash=1" {
133 debugHash = true
134 }
135 if f == "gocachetest=1" {
136 DebugTest = true
137 }
138 }
139 }
140 141 // Get looks up the action ID in the cache,
142 // returning the corresponding output ID and file size, if any.
143 // Note that finding an output ID does not guarantee that the
144 // saved file for that output ID is still available.
145 func (c *Cache) Get(id ActionID) (Entry, error) {
146 if verify {
147 return Entry{}, &entryNotFoundError{Err: errVerifyMode}
148 }
149 return c.get(id)
150 }
151 152 type Entry struct {
153 OutputID OutputID
154 Size int64
155 Time time.Time
156 }
157 158 // get is Get but does not respect verify mode, so that Put can use it.
159 func (c *Cache) get(id ActionID) (Entry, error) {
160 missing := func(reason error) (Entry, error) {
161 return Entry{}, &entryNotFoundError{Err: reason}
162 }
163 f, err := os.Open(c.fileName(id, "a"))
164 if err != nil {
165 return missing(err)
166 }
167 defer f.Close()
168 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
169 if n, err := io.ReadFull(f, entry); n > entrySize {
170 return missing(errors.New("too long"))
171 } else if err != io.ErrUnexpectedEOF {
172 if err == io.EOF {
173 return missing(errors.New("file is empty"))
174 }
175 return missing(err)
176 } else if n < entrySize {
177 return missing(errors.New("entry file incomplete"))
178 }
179 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
180 return missing(errors.New("invalid header"))
181 }
182 eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
183 eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
184 esize, entry := entry[1:1+20], entry[1+20:]
185 //lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465
186 etime, entry := entry[1:1+20], entry[1+20:]
187 var buf [HashSize]byte
188 if _, err := hex.Decode(buf[:], eid); err != nil {
189 return missing(fmt.Errorf("decoding ID: %v", err))
190 } else if buf != id {
191 return missing(errors.New("mismatched ID"))
192 }
193 if _, err := hex.Decode(buf[:], eout); err != nil {
194 return missing(fmt.Errorf("decoding output ID: %v", err))
195 }
196 i := 0
197 for i < len(esize) && esize[i] == ' ' {
198 i++
199 }
200 size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
201 if err != nil {
202 return missing(fmt.Errorf("parsing size: %v", err))
203 } else if size < 0 {
204 return missing(errors.New("negative size"))
205 }
206 i = 0
207 for i < len(etime) && etime[i] == ' ' {
208 i++
209 }
210 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
211 if err != nil {
212 return missing(fmt.Errorf("parsing timestamp: %v", err))
213 } else if tm < 0 {
214 return missing(errors.New("negative timestamp"))
215 }
216 217 c.used(c.fileName(id, "a"))
218 219 return Entry{buf, size, time.Unix(0, tm)}, nil
220 }
221 222 // GetFile looks up the action ID in the cache and returns
223 // the name of the corresponding data file.
224 func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
225 entry, err = c.Get(id)
226 if err != nil {
227 return "", Entry{}, err
228 }
229 file = c.OutputFile(entry.OutputID)
230 info, err := os.Stat(file)
231 if err != nil {
232 return "", Entry{}, &entryNotFoundError{Err: err}
233 }
234 if info.Size() != entry.Size {
235 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
236 }
237 return file, entry, nil
238 }
239 240 // GetBytes looks up the action ID in the cache and returns
241 // the corresponding output bytes.
242 // GetBytes should only be used for data that can be expected to fit in memory.
243 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
244 entry, err := c.Get(id)
245 if err != nil {
246 return nil, entry, err
247 }
248 data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
249 if sha256.Sum256(data) != entry.OutputID {
250 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
251 }
252 return data, entry, nil
253 }
254 255 // OutputFile returns the name of the cache file storing output with the given OutputID.
256 func (c *Cache) OutputFile(out OutputID) string {
257 file := c.fileName(out, "d")
258 c.used(file)
259 return file
260 }
261 262 // Time constants for cache expiration.
263 //
264 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
265 // to avoid causing many unnecessary inode updates. The mtimes therefore
266 // roughly reflect "time of last use" but may in fact be older by at most an hour.
267 //
268 // We scan the cache for entries to delete at most once per trimInterval (1 day).
269 //
270 // When we do scan the cache, we delete entries that have not been used for
271 // at least trimLimit (5 days). Statistics gathered from a month of usage by
272 // Go developers found that essentially all reuse of cached entries happened
273 // within 5 days of the previous reuse. See golang.org/issue/22990.
274 const (
275 mtimeInterval = 1 * time.Hour
276 trimInterval = 24 * time.Hour
277 trimLimit = 5 * 24 * time.Hour
278 )
279 280 // used makes a best-effort attempt to update mtime on file,
281 // so that mtime reflects cache access time.
282 //
283 // Because the reflection only needs to be approximate,
284 // and to reduce the amount of disk activity caused by using
285 // cache entries, used only updates the mtime if the current
286 // mtime is more than an hour old. This heuristic eliminates
287 // nearly all of the mtime updates that would otherwise happen,
288 // while still keeping the mtimes useful for cache trimming.
289 func (c *Cache) used(file string) {
290 info, err := os.Stat(file)
291 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
292 return
293 }
294 os.Chtimes(file, c.now(), c.now())
295 }
296 297 // Trim removes old cache entries that are likely not to be reused.
298 func (c *Cache) Trim() {
299 now := c.now()
300 301 // We maintain in dir/trim.txt the time of the last completed cache trim.
302 // If the cache has been trimmed recently enough, do nothing.
303 // This is the common case.
304 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
305 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
306 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
307 return
308 }
309 310 // Trim each of the 256 subdirectories.
311 // We subtract an additional mtimeInterval
312 // to account for the imprecision of our "last used" mtimes.
313 cutoff := now.Add(-trimLimit - mtimeInterval)
314 for i := 0; i < 256; i++ {
315 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
316 c.trimSubdir(subdir, cutoff)
317 }
318 319 // Ignore errors from here: if we don't write the complete timestamp, the
320 // cache will appear older than it is, and we'll trim it again next time.
321 renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
322 }
323 324 // trimSubdir trims a single cache subdirectory.
325 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
326 // Read all directory entries from subdir before removing
327 // any files, in case removing files invalidates the file offset
328 // in the directory scan. Also, ignore error from f.Readdirnames,
329 // because we don't care about reporting the error and we still
330 // want to process any entries found before the error.
331 f, err := os.Open(subdir)
332 if err != nil {
333 return
334 }
335 names, _ := f.Readdirnames(-1)
336 f.Close()
337 338 for _, name := range names {
339 // Remove only cache entries (xxxx-a and xxxx-d).
340 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
341 continue
342 }
343 entry := filepath.Join(subdir, name)
344 info, err := os.Stat(entry)
345 if err == nil && info.ModTime().Before(cutoff) {
346 os.Remove(entry)
347 }
348 }
349 }
350 351 // putIndexEntry adds an entry to the cache recording that executing the action
352 // with the given id produces an output with the given output id (hash) and size.
353 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
354 // Note: We expect that for one reason or another it may happen
355 // that repeating an action produces a different output hash
356 // (for example, if the output contains a time stamp or temp dir name).
357 // While not ideal, this is also not a correctness problem, so we
358 // don't make a big deal about it. In particular, we leave the action
359 // cache entries writable specifically so that they can be overwritten.
360 //
361 // Setting GODEBUG=gocacheverify=1 does make a big deal:
362 // in verify mode we are double-checking that the cache entries
363 // are entirely reproducible. As just noted, this may be unrealistic
364 // in some cases but the check is also useful for shaking out real bugs.
365 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
366 if verify && allowVerify {
367 old, err := c.get(id)
368 if err == nil && (old.OutputID != out || old.Size != size) {
369 // panic to show stack trace, so we can see what code is generating this cache entry.
370 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
371 panic(msg)
372 }
373 }
374 file := c.fileName(id, "a")
375 376 // Copy file to cache directory.
377 mode := os.O_WRONLY | os.O_CREATE
378 f, err := os.OpenFile(file, mode, 0666)
379 if err != nil {
380 return err
381 }
382 _, err = f.WriteString(entry)
383 if err == nil {
384 // Truncate the file only *after* writing it.
385 // (This should be a no-op, but truncate just in case of previous corruption.)
386 //
387 // This differs from ioutil.WriteFile, which truncates to 0 *before* writing
388 // via os.O_TRUNC. Truncating only after writing ensures that a second write
389 // of the same content to the same file is idempotent, and does not — even
390 // temporarily! — undo the effect of the first write.
391 err = f.Truncate(int64(len(entry)))
392 }
393 if closeErr := f.Close(); err == nil {
394 err = closeErr
395 }
396 if err != nil {
397 // TODO(bcmills): This Remove potentially races with another go command writing to file.
398 // Can we eliminate it?
399 os.Remove(file)
400 return err
401 }
402 os.Chtimes(file, c.now(), c.now()) // mainly for tests
403 404 return nil
405 }
406 407 // Put stores the given output in the cache as the output for the action ID.
408 // It may read file twice. The content of file must not change between the two passes.
409 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
410 return c.put(id, file, true)
411 }
412 413 // PutNoVerify is like Put but disables the verify check
414 // when GODEBUG=goverifycache=1 is set.
415 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
416 // like test output containing times and the like.
417 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
418 return c.put(id, file, false)
419 }
420 421 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
422 // Compute output ID.
423 h := sha256.New()
424 if _, err := file.Seek(0, 0); err != nil {
425 return OutputID{}, 0, err
426 }
427 size, err := io.Copy(h, file)
428 if err != nil {
429 return OutputID{}, 0, err
430 }
431 var out OutputID
432 h.Sum(out[:0])
433 434 // Copy to cached output file (if not already present).
435 if err := c.copyFile(file, out, size); err != nil {
436 return out, size, err
437 }
438 439 // Add to cache index.
440 return out, size, c.putIndexEntry(id, out, size, allowVerify)
441 }
442 443 // PutBytes stores the given bytes in the cache as the output for the action ID.
444 func (c *Cache) PutBytes(id ActionID, data []byte) error {
445 _, _, err := c.Put(id, bytes.NewReader(data))
446 return err
447 }
448 449 // copyFile copies file into the cache, expecting it to have the given
450 // output ID and size, if that file is not present already.
451 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
452 name := c.fileName(out, "d")
453 info, err := os.Stat(name)
454 if err == nil && info.Size() == size {
455 // Check hash.
456 if f, err := os.Open(name); err == nil {
457 h := sha256.New()
458 io.Copy(h, f)
459 f.Close()
460 var out2 OutputID
461 h.Sum(out2[:0])
462 if out == out2 {
463 return nil
464 }
465 }
466 // Hash did not match. Fall through and rewrite file.
467 }
468 469 // Copy file to cache directory.
470 mode := os.O_RDWR | os.O_CREATE
471 if err == nil && info.Size() > size { // shouldn't happen but fix in case
472 mode |= os.O_TRUNC
473 }
474 f, err := os.OpenFile(name, mode, 0666)
475 if err != nil {
476 return err
477 }
478 defer f.Close()
479 if size == 0 {
480 // File now exists with correct size.
481 // Only one possible zero-length file, so contents are OK too.
482 // Early return here makes sure there's a "last byte" for code below.
483 return nil
484 }
485 486 // From here on, if any of the I/O writing the file fails,
487 // we make a best-effort attempt to truncate the file f
488 // before returning, to avoid leaving bad bytes in the file.
489 490 // Copy file to f, but also into h to double-check hash.
491 if _, err := file.Seek(0, 0); err != nil {
492 f.Truncate(0)
493 return err
494 }
495 h := sha256.New()
496 w := io.MultiWriter(f, h)
497 if _, err := io.CopyN(w, file, size-1); err != nil {
498 f.Truncate(0)
499 return err
500 }
501 // Check last byte before writing it; writing it will make the size match
502 // what other processes expect to find and might cause them to start
503 // using the file.
504 buf := make([]byte, 1)
505 if _, err := file.Read(buf); err != nil {
506 f.Truncate(0)
507 return err
508 }
509 h.Write(buf)
510 sum := h.Sum(nil)
511 if !bytes.Equal(sum, out[:]) {
512 f.Truncate(0)
513 return fmt.Errorf("file content changed underfoot")
514 }
515 516 // Commit cache file entry.
517 if _, err := f.Write(buf); err != nil {
518 f.Truncate(0)
519 return err
520 }
521 if err := f.Close(); err != nil {
522 // Data might not have been written,
523 // but file may look like it is the right size.
524 // To be extra careful, remove cached file.
525 os.Remove(name)
526 return err
527 }
528 os.Chtimes(name, c.now(), c.now()) // mainly for tests
529 530 return nil
531 }
532