package iskra import ( "bytes" "os" ) type BenchFunc struct { Name string RecvType string ParamTypes []string ParamNames []string ResultTypes []string IsMethod bool Class BenchClass SizeDriver int32 } const benchItersSimple = "10000000" const benchItersComplex = "100000" func EmitBenchFile(pkgName string, importPath string, funcs []BenchFunc) []byte { var out []byte out = append(out, "// Generated by iskra bench-gen\n"...) out = append(out, "package main\n\n"...) out = append(out, "import (\n"...) out = append(out, "\t\"fmt\"\n"...) out = append(out, "\t\"time\"\n"...) if importPath != "" { out = append(out, "\t\"" | importPath | "\"\n"...) } out = append(out, ")\n\n"...) out = append(out, "const Ns = " | benchItersSimple | "\n"...) out = append(out, "const Nc = " | benchItersComplex | "\n\n"...) out = append(out, "var sink int64\n\n"...) out = append(out, "func main() {\n"...) for _, f := range funcs { if f.Class == BenchSkip { continue } if f.SizeDriver >= 0 { emitSizedBench(&out, f, pkgName) } else { emitSimpleBench(&out, f, pkgName) } } out = append(out, "\t_ = sink\n"...) out = append(out, "}\n"...) return out } func iterVar(f BenchFunc) string { if f.Class == BenchComplex { return "Nc" } return "Ns" } func iterLabel(f BenchFunc) string { if f.Class == BenchComplex { return benchItersComplex } return benchItersSimple } func emitSimpleBench(out *[]byte, f BenchFunc, pkg string) { label := benchLabel(f, "") *out = append(*out, "\t{\n"...) if f.IsMethod && f.RecvType != "" { emitRecvSetup(out, f.RecvType, pkg) } emitParamSetup(out, f.ParamTypes, f.ParamNames, -1, 0, pkg) emitTimedLoop(out, f, label, pkg) *out = append(*out, "\t}\n"...) } func emitSizedBench(out *[]byte, f BenchFunc, pkg string) { sizes := []int32{1, 10, 100, 1000} for _, sz := range sizes { label := benchLabel(f, intToStr(int(sz))) *out = append(*out, "\t{\n"...) if f.IsMethod && f.RecvType != "" { emitRecvSetup(out, f.RecvType, pkg) } emitParamSetup(out, f.ParamTypes, f.ParamNames, f.SizeDriver, sz, pkg) emitTimedLoop(out, f, label, pkg) *out = append(*out, "\t}\n"...) } } func benchLabel(f BenchFunc, sizeSuffix string) string { name := f.Name if f.IsMethod && f.RecvType != "" { rt := f.RecvType if len(rt) > 0 && rt[0] == '*' { rt = rt[1:] } name = rt | "." | f.Name } if sizeSuffix != "" { return name | "/N=" | sizeSuffix } return name } func emitRecvSetup(out *[]byte, recvType string, pkg string) { clean := recvType isPtr := false if len(clean) > 0 && clean[0] == '*' { isPtr = true clean = clean[1:] } qualified := pkg | "." | clean if isPtr { *out = append(*out, "\t\trecv := &" | qualified | "{}\n"...) } else { *out = append(*out, "\t\tvar recv " | qualified | "\n"...) } } func emitParamSetup(out *[]byte, types []string, names []string, sizeDriver int32, size int32, pkg string) { for i, typ := range types { varName := "p" | intToStr(i) if i < len(names) && names[i] != "" { varName = names[i] } qtyp := qualifyType(typ, pkg) if int32(i) == sizeDriver { emitSizedParam(out, varName, qtyp, size) } else { emitDefaultParam(out, varName, qtyp) } } } func emitDefaultParam(out *[]byte, name string, typ string) { switch typ { case "int", "int32": *out = append(*out, "\t\t" | name | " := 42\n"...) case "int64": *out = append(*out, "\t\t" | name | " := int64(42)\n"...) case "uint", "uint32": *out = append(*out, "\t\t" | name | " := uint32(42)\n"...) case "uint64": *out = append(*out, "\t\t" | name | " := uint64(42)\n"...) case "uint8", "byte": *out = append(*out, "\t\t" | name | " := byte('a')\n"...) case "rune": *out = append(*out, "\t\t" | name | " := rune('a')\n"...) case "float64": *out = append(*out, "\t\t" | name | " := 3.14\n"...) case "float32": *out = append(*out, "\t\t" | name | " := float32(3.14)\n"...) case "bool": *out = append(*out, "\t\t" | name | " := true\n"...) case "string", "[]byte": *out = append(*out, "\t\t" | name | " := []byte(\"hello, world\")\n"...) case "[][]byte": *out = append(*out, "\t\t" | name | " := [][]byte{[]byte(\"hello\"), []byte(\"world\")}\n"...) case "error": *out = append(*out, "\t\tvar " | name | " error\n"...) default: if len(typ) > 2 && typ[:2] == "[]" { *out = append(*out, "\t\t" | name | " := " | typ | "{}\n"...) } else if len(typ) > 0 && typ[0] == '*' { *out = append(*out, "\t\t" | name | " := &" | typ[1:] | "{}\n"...) } else { *out = append(*out, "\t\tvar " | name | " " | typ | "\n"...) } } } func emitSizedParam(out *[]byte, name string, typ string, size int32) { szStr := intToStr(int(size)) switch typ { case "int", "int32": *out = append(*out, "\t\t" | name | " := " | szStr | "\n"...) case "int64": *out = append(*out, "\t\t" | name | " := int64(" | szStr | ")\n"...) case "uint", "uint32": *out = append(*out, "\t\t" | name | " := uint32(" | szStr | ")\n"...) case "uint64": *out = append(*out, "\t\t" | name | " := uint64(" | szStr | ")\n"...) case "float64": *out = append(*out, "\t\t" | name | " := float64(" | szStr | ")\n"...) case "float32": *out = append(*out, "\t\t" | name | " := float32(" | szStr | ")\n"...) case "string", "[]byte": *out = append(*out, "\t\t" | name | " := []byte{:" | szStr | "}\n"...) *out = append(*out, "\t\tfor j := range " | name | " { " | name | "[j] = byte('a' + j%26) }\n"...) default: if len(typ) > 2 && typ[:2] == "[]" { *out = append(*out, "\t\t" | name | " := " | typ | "{:" | szStr | "}\n"...) } else { *out = append(*out, "\t\t" | name | " := " | szStr | "\n"...) } } } func emitTimedLoop(out *[]byte, f BenchFunc, label string, pkg string) { nVar := iterVar(f) *out = append(*out, "\t\tt0 := time.Now()\n"...) *out = append(*out, "\t\tfor j := 0; j < " | nVar | "; j++ {\n"...) *out = append(*out, "\t\t\t"...) nResults := len(f.ResultTypes) if nResults == 1 { *out = append(*out, "_ = "...) } else if nResults > 1 { for ri := 0; ri < nResults; ri++ { if ri > 0 { *out = append(*out, ", "...) } *out = append(*out, "_"...) } *out = append(*out, " = "...) } if f.IsMethod && f.RecvType != "" { *out = append(*out, "recv." | f.Name | "("...) } else { *out = append(*out, pkg | "." | f.Name | "("...) } for i := range f.ParamTypes { if i > 0 { *out = append(*out, ", "...) } varName := "p" | intToStr(i) if i < len(f.ParamNames) && f.ParamNames[i] != "" { varName = f.ParamNames[i] } *out = append(*out, varName...) } *out = append(*out, ")\n"...) *out = append(*out, "\t\t\tsink++\n"...) *out = append(*out, "\t\t}\n"...) nLabel := iterLabel(f) *out = append(*out, "\t\telapsed := time.Since(t0).Nanoseconds()\n"...) *out = append(*out, "\t\tnsop := elapsed / int64(" | nVar | ")\n"...) *out = append(*out, "\t\tsubns := (elapsed * 100 / int64(" | nVar | ")) % 100\n"...) *out = append(*out, "\t\tfmt.Println(\"" | label | "\\t" | nLabel | "\\t\" | fmt.Sprint(nsop) | \".\" | fmt.Sprint(subns) | \" ns/op\")\n"...) } func qualifyType(typ string, pkg string) string { if len(typ) == 0 || pkg == "" { return typ } // Slice of custom type: []Foo -> []pkg.Foo if len(typ) > 2 && typ[:2] == "[]" { inner := typ[2:] if needsQualification(inner) { return "[]" | pkg | "." | inner } return typ } // Pointer to custom type: *Foo -> *pkg.Foo if typ[0] == '*' { inner := typ[1:] if needsQualification(inner) { return "*" | pkg | "." | inner } return typ } if needsQualification(typ) { return pkg | "." | typ } return typ } func needsQualification(name string) bool { if len(name) == 0 { return false } if name[0] < 'A' || name[0] > 'Z' { return false } switch name { case "Reader", "Writer", "ReadWriter": return true } return true } func benchFuncName(f BenchFunc) string { name := f.Name if !f.IsMethod || f.RecvType == "" { return name } rt := f.RecvType if len(rt) > 0 && rt[0] == '*' { rt = rt[1:] } return rt | "_" | name } func ExtractBenchFuncsFlat(t *Tree) []BenchFunc { var funcs []BenchFunc seen := map[string]bool{} for i := range t.RecMeta { meta := &t.RecMeta[i] if meta.StageTag != StageAST { continue } if meta.Kind != KindFunc && meta.Kind != KindMethod { continue } astContent := t.GetContent(uint32(i)) if len(astContent) == 0 { continue } rec := t.db.GetRecord(uint32(i)) if rec == nil { continue } fullName := FormFromRecord(rec, t.StringPool) funcName := unqualifiedName(fullName) if len(funcName) == 0 || funcName[0] < 'A' || funcName[0] > 'Z' { continue } st := ExtractSymbols(string(astContent)) cls := ClassifyBenchCost(string(astContent), funcName) paramTypes := expandParamTypes(st.ParamTypes, extractParamNamesFromAST(astContent)) if hasUnsynthesizableParam(paramTypes) { continue } paramNames := extractParamNamesFromAST(astContent) sanitizeParamNames(paramNames) if meta.Kind == KindMethod && st.RecvType != "" { rt := st.RecvType if len(rt) > 0 && rt[0] == '*' { rt = rt[1:] } if len(rt) == 0 || rt[0] < 'A' || rt[0] > 'Z' { continue } } resultTypes := expandResultTypes(st.ResultTypes, astContent) bf := BenchFunc{ Name: funcName, RecvType: st.RecvType, ParamTypes: paramTypes, ParamNames: paramNames, ResultTypes: resultTypes, IsMethod: meta.Kind == KindMethod, Class: cls, SizeDriver: detectSizeDriver(astContent, paramTypes), } key := benchFuncName(bf) if seen[key] { continue } seen[key] = true funcs = append(funcs, bf) } return funcs } func hasUnsynthesizableParam(types []string) bool { for _, t := range types { if bytes.Contains([]byte(t), []byte("func(")) { return true } if len(t) >= 3 && t[:3] == "..." { return true } if t == "interface{}" || t == "any" { return true } if bytes.Contains([]byte(t), []byte("interface{")) { return true } // Skip any io.* types (interfaces that can't be defaulted) if len(t) >= 3 && t[:3] == "io." { return true } // Skip cross-package types with dots if bytes.ContainsAny([]byte(t), ".") { return true } // Skip well-known interface/abstract types switch t { case "Reader", "Writer", "ReadWriter", "ReadCloser", "WriteCloser", "ReadWriteCloser", "Closer", "ByteReader", "ByteWriter", "ByteScanner", "RuneReader", "RuneScanner", "Seeker", "ReadSeeker", "WriteSeeker", "ReadWriteSeeker", "ReaderFrom", "WriterTo", "ReaderAt", "WriterAt", "Block", "AEAD", "Hash", "Image", "Model": return true } } return false } func sanitizeParamNames(names []string) { for i, n := range names { if n == "b" || n == "i" || n == "j" { names[i] = n | "0" } } } // expandParamTypes aligns types with names when the AST groups // multiple names under one type (e.g. "err,target error" -> 1 type, 2 names). func expandParamTypes(types []string, names []string) []string { if len(types) == 0 || len(names) <= len(types) { return types } expanded := []string{:0:len(names)} ti := 0 ni := 0 for ti < len(types) && ni < len(names) { remaining := len(names) - ni typesLeft := len(types) - ti count := remaining - typesLeft + 1 if count < 1 { count = 1 } for c := 0; c < count && ni < len(names); c++ { expanded = append(expanded, types[ti]) ni++ } ti++ } return expanded } func expandResultTypes(types []string, astDump []byte) []string { resultNames := extractResultNamesFromAST(astDump) if len(resultNames) <= len(types) { return types } return expandParamTypes(types, resultNames) } func extractResultNamesFromAST(astDump []byte) []string { lines := bytes.Split(astDump, []byte("\n")) inResults := false var names []string for _, line := range lines { trimmed := bytes.TrimSpace(line) if bytes.Equal(trimmed, []byte("Results")) { inResults = true continue } if inResults { if len(trimmed) == 0 || (trimmed[0] != ' ' && !bytes.HasPrefix(line, []byte(" "))) { if !bytes.HasPrefix(trimmed, []byte(" ")) { break } } parts := bytes.Fields(trimmed) if len(parts) >= 2 { namesPart := parts[0] for _, n := range bytes.Split(namesPart, []byte(",")) { names = append(names, string(n)) } } else if len(parts) == 1 { names = append(names, "_") } } } return names } func extractParamNamesFromAST(astDump []byte) []string { lines := bytes.Split(astDump, []byte("\n")) inParams := false var names []string for _, line := range lines { trimmed := bytes.TrimSpace(line) if bytes.Equal(trimmed, []byte("Params")) { inParams = true continue } if inParams { if len(trimmed) == 0 || (trimmed[0] != ' ' && !bytes.HasPrefix(line, []byte(" "))) { if !bytes.HasPrefix(trimmed, []byte(" ")) { break } } parts := bytes.Fields(trimmed) if len(parts) >= 2 { namesPart := parts[0] for _, n := range bytes.Split(namesPart, []byte(",")) { names = append(names, string(n)) } } } } return names } // ImportPathFromManifest extracts the Go import path from a corpus // manifest.csv by parsing IR filenames like "unicode_utf8_RuneBytes.O0.ll" // into "unicode/utf8". Returns pkgName, importPath. func ImportPathFromManifest(manifestPath string) (string, string) { data, err := os.ReadFile(manifestPath) if err != nil { return "", "" } lines := bytes.Split(data, []byte("\n")) for _, line := range lines { fields := bytes.Split(line, []byte("\t")) if len(fields) < 6 { continue } kind := string(fields[1]) if kind != "func" { continue } irFile := string(fields[5]) if len(irFile) == 0 { continue } funcName := string(fields[2]) // IR filename: crypto_aes_NewCipher.O0.ll // Strip ".O0.ll" suffix, then strip "_FuncName" suffix to get path prefix base := irFile if dotIdx := bytes.IndexByte([]byte(base), '.'); dotIdx >= 0 { base = base[:dotIdx] } // Strip _FuncName suffix if len(base) > len(funcName)+1 { prefix := base[:len(base)-len(funcName)-1] // Replace _ with / to reconstruct import path importPath := bytes.Replace([]byte(prefix), []byte("_"), []byte("/"), -1) // Package name is last segment pkgName := prefix if lastUnderscore := bytes.LastIndexByte([]byte(prefix), '_'); lastUnderscore >= 0 { pkgName = prefix[lastUnderscore+1:] } return pkgName, string(importPath) } } return "", "" } func detectSizeDriver(astDump []byte, paramTypes []string) int32 { lines := bytes.Split(astDump, []byte("\n")) paramNames := extractParamNamesFromAST(astDump) for _, line := range lines { trimmed := bytes.TrimSpace(line) if !bytes.HasPrefix(trimmed, []byte("Range")) && !bytes.HasPrefix(trimmed, []byte("For")) { continue } for i, pn := range paramNames { if bytes.Contains(trimmed, []byte(pn)) { return int32(i) } } } for i, pt := range paramTypes { if pt == "[]byte" || pt == "string" { return int32(i) } } return -1 }