main.go raw

   1  // mxcorpus generates a paired AST/IR/ASM/BIN corpus from Moxie source packages.
   2  //
   3  // For each package, it produces:
   4  //   - Source segments (.mx) extracted from original source by declaration
   5  //   - LLVM IR segments (.ll) at four optimization levels (O0, O1, O2, Os)
   6  //   - Assembly segments (.s) at four optimization levels
   7  //   - Binary segments (.bin.hex) at four optimization levels
   8  //   - DWARF line info (.lineinfo) mapping binary offsets to source lines
   9  //   - A manifest.json mapping all segments with positions and metadata
  10  //
  11  // Usage:
  12  //
  13  //	mxcorpus -o <outdir> [-verify] <directory...>         # AST only
  14  //	mxcorpus -o <outdir> -ir <import-path>                # AST + IR
  15  package main
  16  
  17  import (
  18  	"encoding/json"
  19  	"flag"
  20  	"fmt"
  21  	"go/parser"
  22  	"go/token"
  23  	"os"
  24  	"path/filepath"
  25  	"sort"
  26  	"strings"
  27  
  28  	"moxie/mxtext"
  29  )
  30  
  31  var (
  32  	flagVerify bool
  33  	flagIR     string
  34  	flagModDir string
  35  )
  36  
  37  func main() {
  38  	outdir := flag.String("o", "corpus", "output directory")
  39  	flag.BoolVar(&flagVerify, "verify", false, "verify segment concatenation")
  40  	flag.StringVar(&flagIR, "ir", "", "generate IR for this import path (e.g. unicode/utf8)")
  41  	flag.StringVar(&flagModDir, "moddir", "", "module root directory for non-stdlib packages")
  42  	flag.Parse()
  43  
  44  	// AST extraction from directories.
  45  	var pkgDirs []string
  46  	for _, dir := range flag.Args() {
  47  		dirs, err := processDir(dir, *outdir)
  48  		if err != nil {
  49  			fmt.Fprintf(os.Stderr, "%s: %v\n", dir, err)
  50  			os.Exit(1)
  51  		}
  52  		pkgDirs = append(pkgDirs, dirs...)
  53  	}
  54  
  55  	// IR + ASM + BIN + lineinfo + pairing from import path.
  56  	if flagIR != "" {
  57  		pkgDir := ""
  58  		irBase := filepath.Base(flagIR)
  59  		for _, d := range pkgDirs {
  60  			if filepath.Base(d) == irBase {
  61  				pkgDir = d
  62  				break
  63  			}
  64  		}
  65  		if pkgDir == "" {
  66  			if len(pkgDirs) == 1 {
  67  				pkgDir = pkgDirs[0]
  68  			} else {
  69  				fmt.Fprintf(os.Stderr, "ir: cannot determine package directory for %q among %v\n", flagIR, pkgDirs)
  70  				os.Exit(1)
  71  			}
  72  		}
  73  		if err := extractIR(flagIR, pkgDir); err != nil {
  74  			fmt.Fprintf(os.Stderr, "ir: %v\n", err)
  75  			os.Exit(1)
  76  		}
  77  		if err := extractASM(pkgDir); err != nil {
  78  			fmt.Fprintf(os.Stderr, "asm: %v\n", err)
  79  			os.Exit(1)
  80  		}
  81  		if err := extractBIN(pkgDir); err != nil {
  82  			fmt.Fprintf(os.Stderr, "bin: %v (non-fatal)\n", err)
  83  		}
  84  		if err := extractLineInfo(pkgDir); err != nil {
  85  			fmt.Fprintf(os.Stderr, "lineinfo: %v (non-fatal)\n", err)
  86  		}
  87  		if err := enrichManifest(flagIR, pkgDir); err != nil {
  88  			fmt.Fprintf(os.Stderr, "pair: %v\n", err)
  89  			os.Exit(1)
  90  		}
  91  	}
  92  }
  93  
  94  func processDir(dir, outdir string) ([]string, error) {
  95  	entries, err := os.ReadDir(dir)
  96  	if err != nil {
  97  		return nil, err
  98  	}
  99  
 100  	fset := token.NewFileSet()
 101  	pkgFiles := map[string][]parsedFile{}
 102  
 103  	for _, e := range entries {
 104  		if e.IsDir() || !strings.HasSuffix(e.Name(), ".mx") {
 105  			continue
 106  		}
 107  		if strings.HasSuffix(e.Name(), "_test.mx") {
 108  			continue
 109  		}
 110  
 111  		path := filepath.Join(dir, e.Name())
 112  		src, err := os.ReadFile(path)
 113  		if err != nil {
 114  			return nil, fmt.Errorf("read %s: %w", path, err)
 115  		}
 116  
 117  		origSrc := append([]byte(nil), src...)
 118  
 119  		chanRes := mxtext.RewriteChanLiterals(src, fset)
 120  		sliceRes := mxtext.RewriteSliceLiterals(chanRes.Src, fset)
 121  		rewritten := sliceRes.Src
 122  
 123  		f, err := parser.ParseFile(fset, e.Name(), rewritten, parser.ParseComments)
 124  		if err != nil {
 125  			return nil, fmt.Errorf("parse %s: %w", path, err)
 126  		}
 127  
 128  		pf := parsedFile{
 129  			name:    e.Name(),
 130  			origSrc: origSrc,
 131  			src:     rewritten,
 132  			file:    f,
 133  		}
 134  		pkgFiles[f.Name.Name] = append(pkgFiles[f.Name.Name], pf)
 135  	}
 136  
 137  	dirBase := filepath.Base(dir)
 138  	var result []string
 139  
 140  	for pkgName, files := range pkgFiles {
 141  		outName := dirBase
 142  		if pkgName != dirBase && pkgName != "main" {
 143  			outName = pkgName
 144  		} else if pkgName == "main" && len(pkgFiles) > 1 {
 145  			outName = dirBase + "_gen"
 146  		}
 147  
 148  		pkgDir := filepath.Join(outdir, outName)
 149  		manifest := &PackageManifest{
 150  			Package: pkgName,
 151  		}
 152  
 153  		for fileIdx, pf := range files {
 154  			fm, err := extractAST(fset, pf, fileIdx+1, pkgDir)
 155  			if err != nil {
 156  				return nil, fmt.Errorf("ast %s: %w", pf.name, err)
 157  			}
 158  			manifest.Files = append(manifest.Files, fm)
 159  		}
 160  
 161  		manifestPath := filepath.Join(pkgDir, "manifest.json")
 162  		data, err := json.MarshalIndent(manifest, "", "  ")
 163  		if err != nil {
 164  			return nil, err
 165  		}
 166  		if err := os.WriteFile(manifestPath, data, 0644); err != nil {
 167  			return nil, err
 168  		}
 169  
 170  		n := 0
 171  		for _, f := range manifest.Files {
 172  			n += len(f.Segments)
 173  		}
 174  		fmt.Printf("%s: %d files, %d segments\n", outName, len(manifest.Files), n)
 175  
 176  		if flagVerify {
 177  			if err := verifyConcat(files, pkgDir); err != nil {
 178  				return nil, err
 179  			}
 180  		}
 181  		result = append(result, pkgDir)
 182  	}
 183  
 184  	return result, nil
 185  }
 186  
 187  func verifyConcat(files []parsedFile, pkgDir string) error {
 188  	for _, pf := range files {
 189  		segDir := filepath.Join(pkgDir, pf.name+".segments")
 190  		entries, err := os.ReadDir(segDir)
 191  		if err != nil {
 192  			return fmt.Errorf("verify %s: %w", pf.name, err)
 193  		}
 194  
 195  		var names []string
 196  		for _, e := range entries {
 197  			if strings.HasSuffix(e.Name(), ".mx") {
 198  				names = append(names, e.Name())
 199  			}
 200  		}
 201  		sort.Strings(names)
 202  
 203  		var concat []byte
 204  		for _, n := range names {
 205  			data, err := os.ReadFile(filepath.Join(segDir, n))
 206  			if err != nil {
 207  				return err
 208  			}
 209  			concat = append(concat, data...)
 210  		}
 211  
 212  		if string(concat) != string(pf.src) {
 213  			return fmt.Errorf("verify %s: segment concat (%d bytes) != rewritten source (%d bytes)", pf.name, len(concat), len(pf.src))
 214  		}
 215  		fmt.Printf("  verify %s: PASS (%d bytes)\n", pf.name, len(pf.src))
 216  	}
 217  	return nil
 218  }
 219