cc.go raw

   1  package builder
   2  
   3  // This file implements a wrapper around the C compiler (Clang) which uses a
   4  // build cache.
   5  
   6  import (
   7  	"crypto/sha512"
   8  	"encoding/hex"
   9  	"encoding/json"
  10  	"errors"
  11  	"fmt"
  12  	"io"
  13  	"io/fs"
  14  	"os"
  15  	"path/filepath"
  16  	"sort"
  17  	"strings"
  18  	"unicode"
  19  
  20  	"moxie/goenv"
  21  	"tinygo.org/x/go-llvm"
  22  )
  23  
  24  // compileAndCacheCFile compiles a C or assembly file using a build cache.
  25  // Compiling the same file again (if nothing changed, including included header
  26  // files) the output is loaded from the build cache instead.
  27  //
  28  // Its operation is a bit complex (more complex than Go package build caching)
  29  // because the list of file dependencies is only known after the file is
  30  // compiled. However, luckily compilers have a flag to write a list of file
  31  // dependencies in Makefile syntax which can be used for caching.
  32  //
  33  // Because of this complexity, every file has in fact two cached build outputs:
  34  // the file itself, and the list of dependencies. Its operation is as follows:
  35  //
  36  //	depfile = hash(path, compiler, cflags, ...)
  37  //	if depfile exists:
  38  //	  outfile = hash of all files and depfile name
  39  //	  if outfile exists:
  40  //	    # cache hit
  41  //	    return outfile
  42  //	# cache miss
  43  //	tmpfile = compile file
  44  //	read dependencies (side effect of compile)
  45  //	write depfile
  46  //	outfile = hash of all files and depfile name
  47  //	rename tmpfile to outfile
  48  //
  49  // There are a few edge cases that are not handled:
  50  //   - If a file is added to an include path, that file may be included instead of
  51  //     some other file. This would be fixed by also including lookup failures in the
  52  //     dependencies file, but I'm not aware of a compiler which does that.
  53  //   - The Makefile syntax that compilers output has issues, see readDepFile for
  54  //     details.
  55  //   - A header file may be changed to add/remove an include. This invalidates the
  56  //     depfile but without invalidating its name. For this reason, the depfile is
  57  //     written on each new compilation (even when it seems unnecessary). However, it
  58  //     could in rare cases lead to a stale file fetched from the cache.
  59  func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) {
  60  	// Hash input file.
  61  	fileHash, err := hashFile(abspath)
  62  	if err != nil {
  63  		return "", err
  64  	}
  65  
  66  	// Acquire a lock (if supported).
  67  	unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock"))
  68  	defer unlock()
  69  
  70  	// Create cache key for the dependencies file.
  71  	buf, err := json.Marshal(struct {
  72  		Path        string
  73  		Hash        string
  74  		Flags       []string
  75  		LLVMVersion string
  76  	}{
  77  		Path:        abspath,
  78  		Hash:        fileHash,
  79  		Flags:       cflags,
  80  		LLVMVersion: llvm.Version,
  81  	})
  82  	if err != nil {
  83  		panic(err) // shouldn't happen
  84  	}
  85  	depfileNameHashBuf := sha512.Sum512_224(buf)
  86  	depfileNameHash := hex.EncodeToString(depfileNameHashBuf[:])
  87  
  88  	// Load dependencies file, if possible.
  89  	depfileName := "dep-" + depfileNameHash + ".json"
  90  	depfileCachePath := filepath.Join(goenv.Get("GOCACHE"), depfileName)
  91  	depfileBuf, err := os.ReadFile(depfileCachePath)
  92  	var dependencies []string // sorted list of dependency paths
  93  	if err == nil {
  94  		// There is a dependency file, that's great!
  95  		// Parse it first.
  96  		err := json.Unmarshal(depfileBuf, &dependencies)
  97  		if err != nil {
  98  			return "", fmt.Errorf("could not parse dependencies JSON: %w", err)
  99  		}
 100  
 101  		// Obtain hashes of all the files listed as a dependency.
 102  		outpath, err := makeCFileCachePath(dependencies, depfileNameHash)
 103  		if err == nil {
 104  			if _, err := os.Stat(outpath); err == nil {
 105  				return outpath, nil
 106  			} else if !errors.Is(err, fs.ErrNotExist) {
 107  				return "", err
 108  			}
 109  		}
 110  	} else if !errors.Is(err, fs.ErrNotExist) {
 111  		// expected either nil or IsNotExist
 112  		return "", err
 113  	}
 114  
 115  	objTmpFile, err := os.CreateTemp(goenv.Get("GOCACHE"), "tmp-*.bc")
 116  	if err != nil {
 117  		return "", err
 118  	}
 119  	objTmpFile.Close()
 120  	depTmpFile, err := os.CreateTemp(tmpdir, "dep-*.d")
 121  	if err != nil {
 122  		return "", err
 123  	}
 124  	depTmpFile.Close()
 125  	flags := append([]string{}, cflags...)                                                 // copy cflags
 126  	flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name(), "-flto=thin") // autogenerate dependencies
 127  	flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath)
 128  	if strings.ToLower(filepath.Ext(abspath)) == ".s" {
 129  		// If this is an assembly file (.s or .S, lowercase or uppercase), then
 130  		// we'll need to add -Qunused-arguments because many parameters are
 131  		// relevant to C, not assembly. And with -Werror, having meaningless
 132  		// flags (for the assembler) is a compiler error.
 133  		flags = append(flags, "-Qunused-arguments")
 134  	}
 135  	if printCommands != nil {
 136  		printCommands("clang", flags...)
 137  	}
 138  	err = runCCompiler(flags...)
 139  	if err != nil {
 140  		return "", &commandError{"failed to build", abspath, err}
 141  	}
 142  
 143  	// Create sorted and uniqued slice of dependencies.
 144  	dependencyPaths, err := readDepFile(depTmpFile.Name())
 145  	if err != nil {
 146  		return "", err
 147  	}
 148  	dependencyPaths = append(dependencyPaths, abspath) // necessary for .s files
 149  	dependencySet := make(map[string]struct{}, len(dependencyPaths))
 150  	var dependencySlice []string
 151  	for _, path := range dependencyPaths {
 152  		if _, ok := dependencySet[path]; ok {
 153  			continue
 154  		}
 155  		dependencySet[path] = struct{}{}
 156  		dependencySlice = append(dependencySlice, path)
 157  	}
 158  	sort.Strings(dependencySlice)
 159  
 160  	// Write dependencies file.
 161  	f, err := os.CreateTemp(filepath.Dir(depfileCachePath), depfileName)
 162  	if err != nil {
 163  		return "", err
 164  	}
 165  
 166  	buf, err = json.MarshalIndent(dependencySlice, "", "\t")
 167  	if err != nil {
 168  		panic(err) // shouldn't happen
 169  	}
 170  	_, err = f.Write(buf)
 171  	if err != nil {
 172  		return "", err
 173  	}
 174  	err = f.Close()
 175  	if err != nil {
 176  		return "", err
 177  	}
 178  	err = os.Rename(f.Name(), depfileCachePath)
 179  	if err != nil {
 180  		return "", err
 181  	}
 182  
 183  	// Move temporary object file to final location.
 184  	outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash)
 185  	if err != nil {
 186  		return "", err
 187  	}
 188  	err = os.Rename(objTmpFile.Name(), outpath)
 189  	if err != nil {
 190  		return "", err
 191  	}
 192  
 193  	return outpath, nil
 194  }
 195  
 196  // Create a cache path (a path in GOCACHE) to store the output of a compiler
 197  // job. This path is based on the dep file name (which is a hash of metadata
 198  // including compiler flags) and the hash of all input files in the paths slice.
 199  func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) {
 200  	// Hash all input files.
 201  	fileHashes := make(map[string]string, len(paths))
 202  	for _, path := range paths {
 203  		hash, err := hashFile(path)
 204  		if err != nil {
 205  			return "", err
 206  		}
 207  		fileHashes[path] = hash
 208  	}
 209  
 210  	// Calculate a cache key based on the above hashes.
 211  	buf, err := json.Marshal(struct {
 212  		DepfileHash string
 213  		FileHashes  map[string]string
 214  	}{
 215  		DepfileHash: depfileNameHash,
 216  		FileHashes:  fileHashes,
 217  	})
 218  	if err != nil {
 219  		panic(err) // shouldn't happen
 220  	}
 221  	outFileNameBuf := sha512.Sum512_224(buf)
 222  	cacheKey := hex.EncodeToString(outFileNameBuf[:])
 223  
 224  	outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".bc")
 225  	return outpath, nil
 226  }
 227  
 228  // hashFile hashes the given file path and returns the hash as a hex string.
 229  func hashFile(path string) (string, error) {
 230  	f, err := os.Open(path)
 231  	if err != nil {
 232  		return "", fmt.Errorf("failed to hash file: %w", err)
 233  	}
 234  	defer f.Close()
 235  	fileHasher := sha512.New512_224()
 236  	_, err = io.Copy(fileHasher, f)
 237  	if err != nil {
 238  		return "", fmt.Errorf("failed to hash file: %w", err)
 239  	}
 240  	return hex.EncodeToString(fileHasher.Sum(nil)), nil
 241  }
 242  
 243  // readDepFile reads a dependency file in NMake (Visual Studio make) format. The
 244  // file is assumed to have a single target named deps.
 245  //
 246  // There are roughly three make syntax variants:
 247  //   - BSD make, which doesn't support any escaping. This means that many special
 248  //     characters are not supported in file names.
 249  //   - GNU make, which supports escaping using a backslash but when it fails to
 250  //     find a file it tries to fall back with the literal path name (to match BSD
 251  //     make).
 252  //   - NMake (Visual Studio) and Jom, which simply quote the string if there are
 253  //     any weird characters.
 254  //
 255  // Clang supports two variants: a format that's a compromise between BSD and GNU
 256  // make (and is buggy to match GCC which is equally buggy), and NMake/Jom, which
 257  // is at least somewhat sane. This last format isn't perfect either: it does not
 258  // correctly handle filenames with quote marks in them. Those are generally not
 259  // allowed on Windows, but of course can be used on POSIX like systems. Still,
 260  // it's the most sane of any of the formats so readDepFile will use that format.
 261  func readDepFile(filename string) ([]string, error) {
 262  	buf, err := os.ReadFile(filename)
 263  	if err != nil {
 264  		return nil, err
 265  	}
 266  	if len(buf) == 0 {
 267  		return nil, nil
 268  	}
 269  	return parseDepFile(string(buf))
 270  }
 271  
 272  func parseDepFile(s string) ([]string, error) {
 273  	// This function makes no attempt at parsing anything other than Clang -MD
 274  	// -MV output.
 275  
 276  	// For Windows: replace CRLF with LF to make the logic below simpler.
 277  	s = strings.ReplaceAll(s, "\r\n", "\n")
 278  
 279  	// Collapse all lines ending in a backslash. These backslashes are really
 280  	// just a way to continue a line without making very long lines.
 281  	s = strings.ReplaceAll(s, "\\\n", " ")
 282  
 283  	// Only use the first line, which is expected to begin with "deps:".
 284  	line := strings.SplitN(s, "\n", 2)[0]
 285  	if !strings.HasPrefix(line, "deps:") {
 286  		return nil, errors.New("readDepFile: expected 'deps:' prefix")
 287  	}
 288  	line = strings.TrimSpace(line[len("deps:"):])
 289  
 290  	var deps []string
 291  	for line != "" {
 292  		if line[0] == '"' {
 293  			// File path is quoted. Path ends with double quote.
 294  			// This does not handle double quotes in path names, which is a
 295  			// problem on non-Windows systems.
 296  			line = line[1:]
 297  			end := strings.IndexByte(line, '"')
 298  			if end < 0 {
 299  				return nil, errors.New("readDepFile: path is incorrectly quoted")
 300  			}
 301  			dep := line[:end]
 302  			line = strings.TrimSpace(line[end+1:])
 303  			deps = append(deps, dep)
 304  		} else {
 305  			// File path is not quoted. Path ends in space or EOL.
 306  			end := strings.IndexFunc(line, unicode.IsSpace)
 307  			if end < 0 {
 308  				// last dependency
 309  				deps = append(deps, line)
 310  				break
 311  			}
 312  			dep := line[:end]
 313  			line = strings.TrimSpace(line[end:])
 314  			deps = append(deps, dep)
 315  		}
 316  	}
 317  	return deps, nil
 318  }
 319