// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package cache implements a build artifact cache. // // This package is a slightly modified fork of Go's // cmd/go/internal/cache package. package cache import ( "bytes" "crypto/sha256" "encoding/hex" "fmt" "io" "os" "path/filepath" "strconv" "strings" "time" "github.com/pkg/errors" "github.com/golangci/golangci-lint/internal/renameio" "github.com/golangci/golangci-lint/internal/robustio" ) // An ActionID is a cache action key, the hash of a complete description of a // repeatable computation (command line, environment variables, // input file contents, executable contents). type ActionID [HashSize]byte // An OutputID is a cache output key, the hash of an output of a computation. type OutputID [HashSize]byte // A Cache is a package cache, backed by a file system directory tree. type Cache struct { dir string now func() time.Time } // Open opens and returns the cache in the given directory. // // It is safe for multiple processes on a single machine to use the // same cache directory in a local file system simultaneously. // They will coordinate using operating system file locks and may // duplicate effort but will not corrupt the cache. // // However, it is NOT safe for multiple processes on different machines // to share a cache directory (for example, if the directory were stored // in a network file system). File locking is notoriously unreliable in // network file systems and may not suffice to protect the cache. // func Open(dir string) (*Cache, error) { info, err := os.Stat(dir) if err != nil { return nil, err } if !info.IsDir() { return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} } for i := 0; i < 256; i++ { name := filepath.Join(dir, fmt.Sprintf("%02x", i)) if err := os.MkdirAll(name, 0744); err != nil { return nil, err } } c := &Cache{ dir: dir, now: time.Now, } return c, nil } // fileName returns the name of the file corresponding to the given id. func (c *Cache) fileName(id [HashSize]byte, key string) string { return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) } var errMissing = errors.New("cache entry not found") func IsErrMissing(err error) bool { return errors.Cause(err) == errMissing } const ( // action entry file is "v1 \n" hexSize = HashSize * 2 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 ) // verify controls whether to run the cache in verify mode. // In verify mode, the cache always returns errMissing from Get // but then double-checks in Put that the data being written // exactly matches any existing entry. This provides an easy // way to detect program behavior that would have been different // had the cache entry been returned from Get. // // verify is enabled by setting the environment variable // GODEBUG=gocacheverify=1. var verify = false // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. var DebugTest = false func init() { initEnv() } func initEnv() { verify = false debugHash = false debug := strings.Split(os.Getenv("GODEBUG"), ",") for _, f := range debug { if f == "gocacheverify=1" { verify = true } if f == "gocachehash=1" { debugHash = true } if f == "gocachetest=1" { DebugTest = true } } } // Get looks up the action ID in the cache, // returning the corresponding output ID and file size, if any. // Note that finding an output ID does not guarantee that the // saved file for that output ID is still available. func (c *Cache) Get(id ActionID) (Entry, error) { if verify { return Entry{}, errMissing } return c.get(id) } type Entry struct { OutputID OutputID Size int64 Time time.Time } // get is Get but does not respect verify mode, so that Put can use it. func (c *Cache) get(id ActionID) (Entry, error) { missing := func() (Entry, error) { return Entry{}, errMissing } failed := func(err error) (Entry, error) { return Entry{}, err } fileName := c.fileName(id, "a") f, err := os.Open(fileName) if err != nil { if os.IsNotExist(err) { return missing() } return failed(err) } defer f.Close() entry := make([]byte, entrySize+1) // +1 to detect whether f is too long if n, readErr := io.ReadFull(f, entry); n != entrySize || readErr != io.ErrUnexpectedEOF { return failed(fmt.Errorf("read %d/%d bytes from %s with error %s", n, entrySize, fileName, readErr)) } if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { return failed(fmt.Errorf("bad data in %s", fileName)) } eid, entry := entry[3:3+hexSize], entry[3+hexSize:] eout, entry := entry[1:1+hexSize], entry[1+hexSize:] esize, entry := entry[1:1+20], entry[1+20:] etime := entry[1 : 1+20] var buf [HashSize]byte if _, err = hex.Decode(buf[:], eid); err != nil || buf != id { return failed(errors.Wrapf(err, "failed to hex decode eid data in %s", fileName)) } if _, err = hex.Decode(buf[:], eout); err != nil { return failed(errors.Wrapf(err, "failed to hex decode eout data in %s", fileName)) } i := 0 for i < len(esize) && esize[i] == ' ' { i++ } size, err := strconv.ParseInt(string(esize[i:]), 10, 64) if err != nil || size < 0 { return failed(fmt.Errorf("failed to parse esize int from %s with error %s", fileName, err)) } i = 0 for i < len(etime) && etime[i] == ' ' { i++ } tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) if err != nil || tm < 0 { return failed(fmt.Errorf("failed to parse etime int from %s with error %s", fileName, err)) } if err = c.used(fileName); err != nil { return failed(errors.Wrapf(err, "failed to mark %s as used", fileName)) } return Entry{buf, size, time.Unix(0, tm)}, nil } // GetBytes looks up the action ID in the cache and returns // the corresponding output bytes. // GetBytes should only be used for data that can be expected to fit in memory. func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { entry, err := c.Get(id) if err != nil { return nil, entry, err } outputFile, err := c.OutputFile(entry.OutputID) if err != nil { return nil, entry, err } data, err := robustio.ReadFile(outputFile) if err != nil { return nil, entry, err } if sha256.Sum256(data) != entry.OutputID { return nil, entry, errMissing } return data, entry, nil } // OutputFile returns the name of the cache file storing output with the given OutputID. func (c *Cache) OutputFile(out OutputID) (string, error) { file := c.fileName(out, "d") if err := c.used(file); err != nil { return "", err } return file, nil } // Time constants for cache expiration. // // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), // to avoid causing many unnecessary inode updates. The mtimes therefore // roughly reflect "time of last use" but may in fact be older by at most an hour. // // We scan the cache for entries to delete at most once per trimInterval (1 day). // // When we do scan the cache, we delete entries that have not been used for // at least trimLimit (5 days). Statistics gathered from a month of usage by // Go developers found that essentially all reuse of cached entries happened // within 5 days of the previous reuse. See golang.org/issue/22990. const ( mtimeInterval = 1 * time.Hour trimInterval = 24 * time.Hour trimLimit = 5 * 24 * time.Hour ) // used makes a best-effort attempt to update mtime on file, // so that mtime reflects cache access time. // // Because the reflection only needs to be approximate, // and to reduce the amount of disk activity caused by using // cache entries, used only updates the mtime if the current // mtime is more than an hour old. This heuristic eliminates // nearly all of the mtime updates that would otherwise happen, // while still keeping the mtimes useful for cache trimming. func (c *Cache) used(file string) error { info, err := os.Stat(file) if err != nil { if os.IsNotExist(err) { return errMissing } return errors.Wrapf(err, "failed to stat file %s", file) } if c.now().Sub(info.ModTime()) < mtimeInterval { return nil } if err := os.Chtimes(file, c.now(), c.now()); err != nil { return errors.Wrapf(err, "failed to change time of file %s", file) } return nil } // Trim removes old cache entries that are likely not to be reused. func (c *Cache) Trim() { now := c.now() // We maintain in dir/trim.txt the time of the last completed cache trim. // If the cache has been trimmed recently enough, do nothing. // This is the common case. data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { return } // Trim each of the 256 subdirectories. // We subtract an additional mtimeInterval // to account for the imprecision of our "last used" mtimes. cutoff := now.Add(-trimLimit - mtimeInterval) for i := 0; i < 256; i++ { subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) c.trimSubdir(subdir, cutoff) } // Ignore errors from here: if we don't write the complete timestamp, the // cache will appear older than it is, and we'll trim it again next time. _ = renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) } // trimSubdir trims a single cache subdirectory. func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { // Read all directory entries from subdir before removing // any files, in case removing files invalidates the file offset // in the directory scan. Also, ignore error from f.Readdirnames, // because we don't care about reporting the error and we still // want to process any entries found before the error. f, err := os.Open(subdir) if err != nil { return } names, _ := f.Readdirnames(-1) f.Close() for _, name := range names { // Remove only cache entries (xxxx-a and xxxx-d). if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { continue } entry := filepath.Join(subdir, name) info, err := os.Stat(entry) if err == nil && info.ModTime().Before(cutoff) { os.Remove(entry) } } } // putIndexEntry adds an entry to the cache recording that executing the action // with the given id produces an output with the given output id (hash) and size. func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { // Note: We expect that for one reason or another it may happen // that repeating an action produces a different output hash // (for example, if the output contains a time stamp or temp dir name). // While not ideal, this is also not a correctness problem, so we // don't make a big deal about it. In particular, we leave the action // cache entries writable specifically so that they can be overwritten. // // Setting GODEBUG=gocacheverify=1 does make a big deal: // in verify mode we are double-checking that the cache entries // are entirely reproducible. As just noted, this may be unrealistic // in some cases but the check is also useful for shaking out real bugs. entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) if verify && allowVerify { old, err := c.get(id) if err == nil && (old.OutputID != out || old.Size != size) { // panic to show stack trace, so we can see what code is generating this cache entry. msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) panic(msg) } } file := c.fileName(id, "a") // Copy file to cache directory. mode := os.O_WRONLY | os.O_CREATE f, err := os.OpenFile(file, mode, 0666) if err != nil { return err } _, err = f.WriteString(entry) if err == nil { // Truncate the file only *after* writing it. // (This should be a no-op, but truncate just in case of previous corruption.) // // This differs from ioutil.WriteFile, which truncates to 0 *before* writing // via os.O_TRUNC. Truncating only after writing ensures that a second write // of the same content to the same file is idempotent, and does not — even // temporarily! — undo the effect of the first write. err = f.Truncate(int64(len(entry))) } if closeErr := f.Close(); err == nil { err = closeErr } if err != nil { // TODO(bcmills): This Remove potentially races with another go command writing to file. // Can we eliminate it? os.Remove(file) return err } if err = os.Chtimes(file, c.now(), c.now()); err != nil { // mainly for tests return errors.Wrapf(err, "failed to change time of file %s", file) } return nil } // Put stores the given output in the cache as the output for the action ID. // It may read file twice. The content of file must not change between the two passes. func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { return c.put(id, file, true) } // PutNoVerify is like Put but disables the verify check // when GODEBUG=goverifycache=1 is set. // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, // like test output containing times and the like. func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { return c.put(id, file, false) } func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { // Compute output ID. h := sha256.New() if _, err := file.Seek(0, 0); err != nil { return OutputID{}, 0, err } size, err := io.Copy(h, file) if err != nil { return OutputID{}, 0, err } var out OutputID h.Sum(out[:0]) // Copy to cached output file (if not already present). if err := c.copyFile(file, out, size); err != nil { return out, size, err } // Add to cache index. return out, size, c.putIndexEntry(id, out, size, allowVerify) } // PutBytes stores the given bytes in the cache as the output for the action ID. func (c *Cache) PutBytes(id ActionID, data []byte) error { _, _, err := c.Put(id, bytes.NewReader(data)) return err } // copyFile copies file into the cache, expecting it to have the given // output ID and size, if that file is not present already. func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { name := c.fileName(out, "d") info, err := os.Stat(name) if err == nil && info.Size() == size { // Check hash. if f, openErr := os.Open(name); openErr == nil { h := sha256.New() if _, copyErr := io.Copy(h, f); copyErr != nil { return errors.Wrap(copyErr, "failed to copy to sha256") } f.Close() var out2 OutputID h.Sum(out2[:0]) if out == out2 { return nil } } // Hash did not match. Fall through and rewrite file. } // Copy file to cache directory. mode := os.O_RDWR | os.O_CREATE if err == nil && info.Size() > size { // shouldn't happen but fix in case mode |= os.O_TRUNC } f, err := os.OpenFile(name, mode, 0666) if err != nil { return err } defer f.Close() if size == 0 { // File now exists with correct size. // Only one possible zero-length file, so contents are OK too. // Early return here makes sure there's a "last byte" for code below. return nil } // From here on, if any of the I/O writing the file fails, // we make a best-effort attempt to truncate the file f // before returning, to avoid leaving bad bytes in the file. // Copy file to f, but also into h to double-check hash. if _, err = file.Seek(0, 0); err != nil { _ = f.Truncate(0) return err } h := sha256.New() w := io.MultiWriter(f, h) if _, err = io.CopyN(w, file, size-1); err != nil { _ = f.Truncate(0) return err } // Check last byte before writing it; writing it will make the size match // what other processes expect to find and might cause them to start // using the file. buf := make([]byte, 1) if _, err = file.Read(buf); err != nil { _ = f.Truncate(0) return err } if n, wErr := h.Write(buf); n != len(buf) { return fmt.Errorf("wrote to hash %d/%d bytes with error %s", n, len(buf), wErr) } sum := h.Sum(nil) if !bytes.Equal(sum, out[:]) { _ = f.Truncate(0) return fmt.Errorf("file content changed underfoot") } // Commit cache file entry. if _, err = f.Write(buf); err != nil { _ = f.Truncate(0) return err } if err = f.Close(); err != nil { // Data might not have been written, // but file may look like it is the right size. // To be extra careful, remove cached file. os.Remove(name) return err } if err = os.Chtimes(name, c.now(), c.now()); err != nil { // mainly for tests return errors.Wrapf(err, "failed to change time of file %s", name) } return nil }