package iskra import "bytes" // ASTGen produces the same indented text AST dump format as mxcorpus's // astdump.go, but from raw Moxie source text without using go/parser. // SplitDecls splits a source file into individual top-level declarations. func SplitDecls(src []byte) [][]byte { var decls [][]byte i := 0 for i < len(src) { // Skip whitespace and comments for i < len(src) { if src[i] == ' ' || src[i] == '\t' || src[i] == '\r' || src[i] == '\n' { i++ } else if i+1 < len(src) && src[i] == '/' && src[i+1] == '/' { // Line comment - but check if it's a doc comment (attached to next decl) commentStart := i for i < len(src) && src[i] != '\n' { i++ } if i < len(src) { i++ } // Peek: if next non-blank line is also a comment or a decl keyword, this is a doc comment saved := i for i < len(src) && (src[i] == ' ' || src[i] == '\t' || src[i] == '\r') { i++ } if i < len(src) && src[i] != '\n' { // Non-blank follows. Restore and break to let the decl scanner handle it. i = commentStart break } i = saved } else if i+1 < len(src) && src[i] == '/' && src[i+1] == '*' { i += 2 for i+1 < len(src) { if src[i] == '*' && src[i+1] == '/' { i += 2 break } i++ } } else { break } } if i >= len(src) { break } // Skip package and import statements if hasWordAt(src, i, "package") { for i < len(src) && src[i] != '\n' { i++ } continue } if hasWordAt(src, i, "import") { i += 6 for i < len(src) && (src[i] == ' ' || src[i] == '\t') { i++ } if i < len(src) && src[i] == '(' { depth := 0 for i < len(src) { if src[i] == '(' { depth++ } else if src[i] == ')' { depth-- if depth == 0 { i++ break } } i++ } } else { for i < len(src) && src[i] != '\n' { i++ } } continue } // This is a declaration (func, type, var, const, or doc comment + decl) start := i if hasWordAt(src, i, "func") || hasWordAt(src, i, "type") || hasWordAt(src, i, "var") || hasWordAt(src, i, "const") { i = findDeclEnd(src, i) decls = append(decls, src[start:i]) } else if src[i] == '/' && i+1 < len(src) && src[i+1] == '/' { // Doc comment - include with following decl for i < len(src) && src[i] != '\n' { i++ } if i < len(src) { i++ } } else { // Skip unknown line for i < len(src) && src[i] != '\n' { i++ } if i < len(src) { i++ } } } return decls } func hasWordAt(src []byte, pos int32, word string) bool { if pos+len(word) > len(src) { return false } if string(src[pos:pos+len(word)]) != word { return false } after := pos + len(word) if after < len(src) && isIdent(src[after]) { return false } return true } func findDeclEnd(src []byte, start int32) int32 { i := start // Skip to first { or end of line (for single-line decls) braceDepth := 0 parenDepth := 0 inString := false var strQuote byte for i < len(src) { b := src[i] if inString { if b == '\\' && strQuote != '`' { i += 2 continue } if b == strQuote { inString = false } i++ continue } if b == '/' && i+1 < len(src) && src[i+1] == '/' { for i < len(src) && src[i] != '\n' { i++ } continue } if b == '/' && i+1 < len(src) && src[i+1] == '*' { i += 2 for i+1 < len(src) { if src[i] == '*' && src[i+1] == '/' { i += 2 break } i++ } continue } if b == '"' || b == '\'' || b == '`' { inString = true strQuote = b i++ continue } if b == '(' { parenDepth++ } else if b == ')' { parenDepth-- } else if b == '{' { braceDepth++ } else if b == '}' { braceDepth-- if braceDepth == 0 && parenDepth == 0 { i++ return i } } else if b == '\n' && braceDepth == 0 && parenDepth == 0 { // Single-line decl (like var x int32) // But check if next line continues (for grouped const/var) if i+1 < len(src) { next := i + 1 for next < len(src) && (src[next] == ' ' || src[next] == '\t' || src[next] == '\r') { next++ } if next < len(src) && src[next] != '\n' { // Check if this looks like it starts a new top-level decl if hasWordAt(src, next, "func") || hasWordAt(src, next, "type") || hasWordAt(src, next, "var") || hasWordAt(src, next, "const") || (src[next] == '/' && next+1 < len(src) && src[next+1] == '/') { return i } } } // For now, single-line decl ends at newline if braceDepth == 0 && parenDepth == 0 && i > start+4 { return i } } i++ } return i } // DeclName extracts the name from a declaration. func DeclName(decl []byte) string { g := &astGen{src: decl, pos: 0} g.skipSpaceAndNewlines() // Skip doc comments for g.pos < len(g.src) && g.src[g.pos] == '/' { for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } g.skipSpaceAndNewlines() } if g.matchWord("func") { g.skipSpace() if g.peek() == '(' { // Method: func (recv Type) Name(...) g.skipBalanced('(', ')') g.skipSpace() } return g.readIdent() } if g.matchWord("type") { g.skipSpace() return g.readIdent() } if g.matchWord("var") || g.matchWord("const") { g.skipSpace() if g.peek() == '(' { // Grouped - return first name g.pos++ g.skipSpaceAndNewlines() return g.readIdent() } return g.readIdent() } return "" } // GenAST takes the source of a single declaration and produces an AST dump. func GenAST(src []byte) []byte { g := &astGen{src: src, out: []byte{:0:len(src)}, maxDepth: 8} g.pos = 0 g.skipSpace() if g.matchWord("func") { g.genFuncDecl() } else if g.matchWord("type") { g.genTypeDecl() } else if g.matchWord("var") { g.genVarDecl() } else if g.matchWord("const") { g.genConstDecl() } return g.out } type astGen struct { src []byte pos int32 out []byte maxDepth int32 } // --- output helpers --- func (g *astGen) indent(depth int32) { for i := 0; i < depth; i++ { g.out = append(g.out, ' ', ' ') } } func (g *astGen) line(depth int32, s string) { g.indent(depth) g.out = append(g.out, s...) g.out = append(g.out, '\n') } func (g *astGen) lineRefs(depth int32, prefix string, refs []string) { g.indent(depth) g.out = append(g.out, prefix...) if len(refs) > 0 { g.out = append(g.out, " ["...) for i, r := range refs { if i > 0 { g.out = append(g.out, ',') } g.out = append(g.out, r...) } g.out = append(g.out, ']') } g.out = append(g.out, '\n') } // --- tokenizer helpers --- func (g *astGen) eof() bool { return g.pos >= len(g.src) } func (g *astGen) peek() byte { if g.eof() { return 0 } return g.src[g.pos] } func (g *astGen) next() byte { b := g.src[g.pos] g.pos++ return b } func (g *astGen) skipSpace() { for g.pos < len(g.src) { b := g.src[g.pos] if b == ' ' || b == '\t' || b == '\r' { g.pos++ } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' { for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' { g.pos += 2 for g.pos+1 < len(g.src) { if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' { g.pos += 2 break } g.pos++ } } else { break } } } func (g *astGen) skipSpaceAndNewlines() { for g.pos < len(g.src) { b := g.src[g.pos] if b == ' ' || b == '\t' || b == '\r' || b == '\n' { g.pos++ } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' { for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' { g.pos += 2 for g.pos+1 < len(g.src) { if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' { g.pos += 2 break } g.pos++ } } else { break } } } func (g *astGen) matchWord(w string) bool { g.skipSpaceAndNewlines() if g.pos+len(w) > len(g.src) { return false } if string(g.src[g.pos:g.pos+len(w)]) != w { return false } after := g.pos + len(w) if after < len(g.src) && isIdent(g.src[after]) { return false } g.pos = after return true } func (g *astGen) readIdent() string { g.skipSpace() start := g.pos for g.pos < len(g.src) && isIdent(g.src[g.pos]) { g.pos++ } if g.pos == start { return "" } return string(g.src[start:g.pos]) } func isIdent(b byte) bool { return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_' } func isIdentStart(b byte) bool { return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' } // readTypeExpr reads a type expression like []byte, *RuneIter, map[K]V, etc. func (g *astGen) readTypeExpr() string { g.skipSpace() if g.eof() { return "" } b := g.peek() if b == '*' { g.pos++ return "*" | g.readTypeExpr() } if b == '[' { g.pos++ g.skipSpace() if g.peek() == ']' { g.pos++ return "[]" | g.readTypeExpr() } // array type [N]T lenExpr := g.readExprStr() g.expect(']') return "[" | lenExpr | "]" | g.readTypeExpr() } if b == '.' && g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.' { g.pos += 3 return "..." | g.readTypeExpr() } if g.prefixMatch("map[") { g.pos += 4 key := g.readTypeExpr() g.expect(']') val := g.readTypeExpr() return "map[" | key | "]" | val } if g.prefixMatch("chan ") { g.pos += 4 return "chan " | g.readTypeExpr() } if g.prefixMatch("func(") || g.prefixMatch("func (") { g.pos += 4 g.skipSpace() g.skipBalanced('(', ')') g.skipSpace() // Skip optional return type(s) if g.peek() == '(' { g.skipBalanced('(', ')') } else if !g.eof() && g.peek() != ',' && g.peek() != ')' && g.peek() != '{' && g.peek() != '\n' { g.readTypeExpr() } return "func(...)" } if g.prefixMatch("interface{") { g.pos += 9 g.skipBalanced('{', '}') return "interface{}" } if g.prefixMatch("struct{") { g.pos += 6 g.skipBalanced('{', '}') return "struct{...}" } if g.prefixMatch("struct {") { g.pos += 7 g.skipBalanced('{', '}') return "struct{...}" } name := g.readIdent() if name == "" { return "" } // Check for pkg.Type g.skipSpace() if g.peek() == '.' { g.pos++ sel := g.readIdent() return name | "." | sel } return name } func (g *astGen) readExprStr() string { g.skipSpace() start := g.pos depth := 0 for g.pos < len(g.src) { b := g.src[g.pos] if b == '(' || b == '[' || b == '{' { depth++ } else if b == ')' || b == ']' || b == '}' { if depth == 0 { break } depth-- } else if (b == ',' || b == '\n') && depth == 0 { break } g.pos++ } return string(bytes.TrimSpace(g.src[start:g.pos])) } func (g *astGen) expect(b byte) { g.skipSpace() if g.pos < len(g.src) && g.src[g.pos] == b { g.pos++ } } func (g *astGen) prefixMatch(s string) bool { if g.pos+len(s) > len(g.src) { return false } return string(g.src[g.pos:g.pos+len(s)]) == s } func (g *astGen) skipBalanced(open, close byte) { if g.peek() != open { return } depth := 0 for g.pos < len(g.src) { b := g.src[g.pos] if b == '\'' || b == '"' || b == '`' { g.skipStringLit(b) continue } if b == open { depth++ } else if b == close { depth-- if depth == 0 { g.pos++ return } } g.pos++ } } // --- declaration generators --- func (g *astGen) genFuncDecl() { g.skipSpace() // Check for receiver: func (recv Type) Name(...) recv := "" if g.peek() == '(' { saved := g.pos g.pos++ g.skipSpace() // Try to read receiver name := g.readIdent() _ = name g.skipSpace() recvType := g.readTypeExpr() g.skipSpace() if g.peek() == ')' { g.pos++ recv = recvType } else { g.pos = saved } } g.skipSpace() funcName := g.readIdent() header := "FuncDecl " | funcName if recv != "" { header = header | " recv=" | recv } g.line(0, header) // Parse params g.skipSpace() if g.peek() == '(' { params := g.parseFieldList(false) if len(params) > 0 { g.line(1, "Params") for _, f := range params { g.line(2, f) } } } // Parse results g.skipSpace() if g.peek() == '(' { results := g.parseFieldList(true) if len(results) > 0 { g.line(1, "Results") for _, f := range results { g.line(2, f) } } } else if g.peek() != '{' && !g.eof() { // Single unnamed result typeName := g.readTypeExpr() if typeName != "" { g.line(1, "Results") g.line(2, typeName) } } // Parse body g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(1) } } func (g *astGen) parseFieldList(typesOnly bool) []string { g.expect('(') var fields []string for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } if typesOnly { typeName := g.readTypeExpr() if typeName != "" { fields = append(fields, typeName) } g.skipSpace() if g.peek() == ',' { g.pos++ } continue } // Try: name[,name...] type or just type saved := g.pos var names []string for { name := g.readIdent() if name == "" { break } names = append(names, name) g.skipSpace() if g.peek() == ',' { g.pos++ g.skipSpace() } else { break } } g.skipSpace() p := g.peek() // Single dot after a name = qualified type (pkg.Type), not a param name. // Three dots = variadic (...type), treat as "name type" where the type starts with ... if len(names) == 1 && p == '.' && !(g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.') { g.pos = saved typeName := g.readTypeExpr() if typeName != "" { fields = append(fields, typeName) } } else if len(names) > 0 && (isIdentStart(p) || p == '*' || p == '[' || p == '.' || p == 'm' || p == 'c' || p == 'f' || p == 'i' || p == 's') { typeName := g.readTypeExpr() if typeName != "" { nameStr := "" for i, n := range names { if i > 0 { nameStr = nameStr | "," } nameStr = nameStr | n } fields = append(fields, nameStr | " " | typeName) } else { for _, n := range names { fields = append(fields, n) } } } else if len(names) > 0 { for _, n := range names { fields = append(fields, n) } } else { g.pos = saved typeName := g.readTypeExpr() if typeName != "" { fields = append(fields, typeName) } } g.skipSpace() if g.peek() == ',' { g.pos++ } } return fields } func (g *astGen) genTypeDecl() { g.line(0, "GenDecl type") g.skipSpaceAndNewlines() if g.peek() == '(' { // Grouped type declaration g.pos++ for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } g.genTypeSpec(1) } } else { g.genTypeSpec(1) } } func (g *astGen) genTypeSpec(depth int32) { name := g.readIdent() if name == "" { return } g.skipSpace() // Check if it's a struct - handle specially to preserve field info if g.prefixMatch("struct") { g.pos += 6 g.skipSpaceAndNewlines() if g.peek() == '{' { g.line(depth, "Type " | name | " struct{...}") // Parse struct fields from the body g.pos++ // skip { for { g.skipSpaceAndNewlines() if g.peek() == '}' { g.pos++ break } if g.eof() { break } fg := &astGen{src: g.src, pos: g.pos} fieldName := fg.readIdent() if fieldName == "" { // Skip line for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } continue } fg.skipSpace() fieldType := fg.readTypeExpr() g.pos = fg.pos if fieldType != "" { g.line(depth+1, fieldName | " " | fieldType) } else { g.line(depth+1, fieldName) } // Skip to end of line (tags, comments) for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } } return } } typeExpr := g.readTypeExpr() g.line(depth, "Type " | name | " " | typeExpr) } func (g *astGen) genVarDecl() { g.line(0, "GenDecl var") g.skipSpaceAndNewlines() if g.peek() == '(' { g.pos++ for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } g.genValueSpec(1) } } else { g.genValueSpec(1) } } func (g *astGen) genConstDecl() { g.line(0, "GenDecl const") g.skipSpaceAndNewlines() if g.peek() == '(' { g.pos++ for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } g.genValueSpec(1) } } else { g.genValueSpec(1) } } func (g *astGen) genValueSpec(depth int32) { var names []string for { name := g.readIdent() if name == "" { break } names = append(names, name) g.skipSpace() if g.peek() == ',' { g.pos++ g.skipSpace() } else { break } } if len(names) == 0 { // Skip to next line for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } return } nameStr := "" for i, n := range names { if i > 0 { nameStr = nameStr | "," } nameStr = nameStr | n } // Check for type g.skipSpace() typ := "" p := g.peek() if isIdentStart(p) || p == '*' || p == '[' { // Could be type or = sign saved := g.pos if p != '=' { tryType := g.readTypeExpr() g.skipSpace() if g.peek() == '=' || g.peek() == '\n' || g.eof() || g.peek() == ')' { typ = tryType } else { g.pos = saved } } } if typ != "" { g.line(depth, "Value " | nameStr | " " | typ) } else { g.line(depth, "Value " | nameStr) } // Skip rest of line (value expression), respecting nested parens parenD := 0 for g.pos < len(g.src) { b := g.src[g.pos] if b == '(' { parenD++ } else if b == ')' { if parenD == 0 { break } parenD-- } else if b == '\n' && parenD == 0 { break } g.pos++ } } // --- block and statement generators --- func (g *astGen) genBlock(depth int32) { if depth > g.maxDepth { g.skipBalanced('{', '}') return } g.line(depth, "Block") g.expect('{') iters := 0 for { g.skipSpaceAndNewlines() if g.peek() == '}' { g.pos++ return } if g.eof() { return } iters++ if iters > 500 { g.skipBalanced('{', '}') return } saved := g.pos g.genStmt(depth + 1) if g.pos == saved { g.skipToStmtEnd() if g.pos == saved { g.pos++ } } } } func (g *astGen) genStmt(depth int32) { g.skipSpaceAndNewlines() if g.eof() || g.peek() == '}' { return } // Try keywords first if g.matchWord("return") { g.genReturn(depth) } else if g.matchWord("if") { g.genIf(depth) } else if g.matchWord("for") { g.genFor(depth) } else if g.matchWord("switch") { g.genSwitch(depth) } else if g.matchWord("select") { g.genSelect(depth) } else if g.matchWord("case") { g.genCase(depth) } else if g.matchWord("default") { g.genDefault(depth) } else if g.matchWord("break") { g.line(depth, "break") g.skipToStmtEnd() } else if g.matchWord("continue") { g.line(depth, "continue") g.skipToStmtEnd() } else if g.matchWord("defer") { g.genDefer(depth) } else if g.matchWord("var") { g.genVarStmt(depth) } else if g.matchWord("type") { g.genTypeStmt(depth) } else if g.matchWord("const") { g.genConstStmt(depth) } else if g.matchWord("go") { g.genGoStmt(depth) } else { // Assignment or expression statement g.genAssignOrExpr(depth) } } func (g *astGen) genReturn(depth int32) { g.skipSpace() if g.peek() == '\n' || g.peek() == '}' || g.eof() { g.line(depth, "Return") return } // Collect refs from return expressions stmtText := g.readToStmtEnd() refs := collectRefsFromText(stmtText) g.lineRefs(depth, "Return", refs) } func (g *astGen) genIf(depth int32) { // Read the condition (everything up to the opening {) condText := g.readToBlockOpen() refs := collectRefsFromText(condText) g.lineRefs(depth, "If", refs) g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(depth + 1) } // Check for else g.skipSpaceAndNewlines() if g.matchWord("else") { g.skipSpaceAndNewlines() if g.matchWord("if") { g.line(depth, "Else") g.genIf(depth + 1) } else { g.line(depth, "Else") if g.peek() == '{' { g.genBlock(depth + 1) } } } } func (g *astGen) genFor(depth int32) { g.skipSpace() if g.peek() == '{' { g.line(depth, "For") g.genBlock(depth + 1) return } // Check for range saved := g.pos condText := g.readToBlockOpen() if bytes.Contains(condText, []byte("range ")) || bytes.HasPrefix(bytes.TrimSpace(condText), []byte("range ")) { rangeExpr := extractRangeExpr(condText) rangeIdx := bytes.Index(condText, []byte("range ")) var refs []string if rangeIdx >= 0 { afterRange := condText[rangeIdx+6:] refs = append(refs, collectRefsFromText(afterRange)...) beforeRange := condText[:rangeIdx] refs = append(refs, collectRefsFromText(beforeRange)...) } else { refs = collectRefsFromText(condText) } if rangeExpr != "" { g.lineRefs(depth, "Range " | rangeExpr, refs) } else { g.lineRefs(depth, "Range", refs) } g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(depth + 1) } return } // Regular for loop _ = saved refs := collectRefsFromText(condText) g.lineRefs(depth, "For", refs) g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(depth + 1) } } func (g *astGen) genSwitch(depth int32) { g.skipSpace() if g.peek() == '{' { g.line(depth, "Switch") g.genBlock(depth + 1) return } condText := g.readToBlockOpen() refs := collectRefsFromText(condText) g.lineRefs(depth, "Switch", refs) g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(depth + 1) } } func (g *astGen) genSelect(depth int32) { g.line(depth, "Select") g.skipSpaceAndNewlines() if g.peek() == '{' { g.genBlock(depth + 1) } } func (g *astGen) genCase(depth int32) { g.skipSpace() caseText := g.readToCaseEnd() refs := collectRefsFromText(caseText) g.lineRefs(depth, "Case", refs) for { g.skipSpaceAndNewlines() p := g.peek() if p == '}' || g.eof() { break } saved := g.pos if g.matchWord("case") || g.matchWord("default") { g.pos = saved break } g.pos = saved g.genStmt(depth + 1) if g.pos == saved { g.skipToStmtEnd() if g.pos == saved { g.pos++ } } } } func (g *astGen) genDefault(depth int32) { g.line(depth, "Default") g.skipSpace() if g.peek() == ':' { g.pos++ } for { g.skipSpaceAndNewlines() p := g.peek() if p == '}' || g.eof() { break } saved := g.pos if g.matchWord("case") || g.matchWord("default") { g.pos = saved break } g.pos = saved g.genStmt(depth + 1) if g.pos == saved { g.skipToStmtEnd() if g.pos == saved { g.pos++ } } } } func (g *astGen) genDefer(depth int32) { g.skipSpace() stmtText := g.readToStmtEnd() // Extract function name from call funcName := "" parenIdx := bytes.IndexByte(stmtText, '(') if parenIdx > 0 { funcName = string(bytes.TrimSpace(stmtText[:parenIdx])) } refs := collectRefsFromText(stmtText) if funcName != "" { g.lineRefs(depth, "Defer " | funcName, refs) } else { g.lineRefs(depth, "Defer", refs) } } func (g *astGen) genGoStmt(depth int32) { g.skipSpace() stmtText := g.readToStmtEnd() funcName := "" parenIdx := bytes.IndexByte(stmtText, '(') if parenIdx > 0 { funcName = string(bytes.TrimSpace(stmtText[:parenIdx])) } refs := collectRefsFromText(stmtText) if funcName != "" { g.lineRefs(depth, "Go " | funcName, refs) } else { g.lineRefs(depth, "Go", refs) } } func (g *astGen) genVarStmt(depth int32) { g.line(depth, "GenDecl var") g.skipSpaceAndNewlines() if g.peek() == '(' { g.pos++ for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } g.genValueSpec(depth + 1) } } else { g.genValueSpec(depth + 1) } } func (g *astGen) genTypeStmt(depth int32) { g.line(depth, "GenDecl type") g.skipSpaceAndNewlines() g.genTypeSpec(depth + 1) } func (g *astGen) genConstStmt(depth int32) { g.line(depth, "GenDecl const") g.skipSpaceAndNewlines() if g.peek() == '(' { g.pos++ for { g.skipSpaceAndNewlines() if g.peek() == ')' { g.pos++ break } if g.eof() { break } g.genValueSpec(depth + 1) } } else { g.genValueSpec(depth + 1) } } func (g *astGen) genAssignOrExpr(depth int32) { // Read the full statement text stmtStart := g.pos stmtText := g.readToStmtEnd() // Check if this is an assignment assignOp := findAssignOp(stmtText) if assignOp != "" { lhs := extractLHS(stmtText, assignOp) refs := collectRefsWithFuncLit(stmtText, assignOp) g.lineRefs(depth, "Assign " | lhs | " " | assignOp, refs) return } // Check for inc/dec trimmed := bytes.TrimSpace(stmtText) if len(trimmed) >= 2 { suffix := string(trimmed[len(trimmed)-2:]) if suffix == "++" || suffix == "--" { exprPart := string(bytes.TrimSpace(trimmed[:len(trimmed)-2])) refs := collectRefsFromText(stmtText) g.lineRefs(depth, exprPart | " " | suffix, refs) return } } // Expression statement _ = stmtStart refs := collectRefsFromText(stmtText) exprName := normalizeCallArgs(string(bytes.TrimSpace(stmtText))) g.lineRefs(depth, "Expr " | exprName, refs) } func normalizeCallArgs(s string) string { idx := bytes.IndexByte([]byte(s), '(') if idx < 0 { return s } depth := 0 for i := idx; i < len(s); i++ { if s[i] == '(' { depth++ } else if s[i] == ')' { depth-- if depth == 0 { return s[:idx] | "(...)" | s[i+1:] } } } return s } // --- text readers --- func (g *astGen) readToStmtEnd() []byte { start := g.pos depth := 0 for g.pos < len(g.src) { b := g.src[g.pos] if b == '(' || b == '[' || b == '{' { depth++ } else if b == ')' || b == ']' || b == '}' { if depth == 0 { break } depth-- } else if b == '\n' && depth == 0 { break } // Stop at line comments if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' && depth == 0 { result := g.src[start:g.pos] for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } return result } // Skip string literals if b == '"' || b == '\'' || b == '`' { g.skipStringLit(b) continue } g.pos++ } return g.src[start:g.pos] } func (g *astGen) readToBlockOpen() []byte { start := g.pos depth := 0 for g.pos < len(g.src) { b := g.src[g.pos] if b == '{' && depth == 0 { break } if b == '(' { depth++ } else if b == ')' { depth-- } // Skip comments if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' { for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } continue } // Skip string/char literals if b == '"' || b == '\'' || b == '`' { g.skipStringLit(b) continue } g.pos++ } return g.src[start:g.pos] } func (g *astGen) readToCaseEnd() []byte { start := g.pos for g.pos < len(g.src) { b := g.src[g.pos] if b == '\'' || b == '"' || b == '`' { g.skipStringLit(b) continue } if b == ':' { result := g.src[start:g.pos] g.pos++ return result } if b == '\n' { break } g.pos++ } return g.src[start:g.pos] } func (g *astGen) skipToStmtEnd() { for g.pos < len(g.src) && g.src[g.pos] != '\n' { g.pos++ } } func (g *astGen) skipStringLit(quote byte) { g.pos++ // skip opening quote if quote == '`' { for g.pos < len(g.src) && g.src[g.pos] != '`' { g.pos++ } } else { for g.pos < len(g.src) && g.src[g.pos] != quote { if g.src[g.pos] == '\\' { g.pos++ } g.pos++ } } if g.pos < len(g.src) { g.pos++ } } // --- reference collection --- func astBuiltins() map[string]bool { return map[string]bool{ "bool": true, "byte": true, "int8": true, "int16": true, "int32": true, "int64": true, "uint8": true, "uint16": true, "uint32": true, "uint64": true, "float32": true, "float64": true, "string": true, "rune": true, "error": true, "any": true, "true": true, "false": true, "nil": true, "len": true, "cap": true, "append": true, "copy": true, "delete": true, "close": true, "panic": true, "recover": true, "print": true, "println": true, "make": true, "new": true, } } func astKeywords() map[string]bool { return map[string]bool{ "func": true, "return": true, "if": true, "else": true, "for": true, "range": true, "switch": true, "case": true, "default": true, "select": true, "break": true, "continue": true, "defer": true, "go": true, "var": true, "const": true, "type": true, "struct": true, "interface": true, "map": true, "chan": true, "package": true, "import": true, "fallthrough": true, "goto": true, } } func collectRefsWithFuncLit(text []byte, assignOp string) []string { opIdx := bytes.Index(text, []byte(assignOp)) if opIdx < 0 { return collectRefsFromText(text) } rhs := bytes.TrimSpace(text[opIdx+len(assignOp):]) if !bytes.HasPrefix(rhs, []byte("func(")) && !bytes.HasPrefix(rhs, []byte("func (")) { return collectRefsFromText(text) } bodyStart := bytes.IndexByte(rhs, '{') if bodyStart < 0 { return collectRefsFromText(text) } lhsPart := text[:opIdx] paramPart := rhs[:bodyStart] bodyPart := rhs[bodyStart:] seen := map[string]bool{} var refs []string addRefs := func(part []byte) { for _, r := range collectRefsFromText(part) { if !seen[r] { seen[r] = true refs = append(refs, r) } } } addRefs(lhsPart) addRefs(bodyPart) addRefs(paramPart) return refs } func collectRefsFromText(text []byte) []string { seen := map[string]bool{} var refs []string i := 0 for i < len(text) { b := text[i] // Skip string/char/rune literals if b == '"' || b == '\'' || b == '`' { i++ if b == '`' { for i < len(text) && text[i] != '`' { i++ } } else { for i < len(text) && text[i] != b { if text[i] == '\\' { i++ } i++ } } if i < len(text) { i++ } continue } // Skip numbers if b >= '0' && b <= '9' { for i < len(text) && (text[i] >= '0' && text[i] <= '9' || text[i] == 'x' || text[i] == 'X' || text[i] >= 'a' && text[i] <= 'f' || text[i] >= 'A' && text[i] <= 'F' || text[i] == '.') { i++ } continue } if isIdentStart(b) { start := i for i < len(text) && isIdent(text[i]) { i++ } name := string(text[start:i]) if !seen[name] && !astBuiltins()[name] && !astKeywords()[name] { seen[name] = true refs = append(refs, name) } continue } i++ } return refs } func findAssignOp(text []byte) string { depth := 0 i := 0 for i < len(text) { b := text[i] if b == '(' || b == '[' || b == '{' { depth++ } else if b == ')' || b == ']' || b == '}' { depth-- } // Skip string literals if b == '"' || b == '\'' || b == '`' { i++ q := b if q == '`' { for i < len(text) && text[i] != '`' { i++ } } else { for i < len(text) && text[i] != q { if text[i] == '\\' { i++ } i++ } } if i < len(text) { i++ } continue } if depth == 0 { if b == ':' && i+1 < len(text) && text[i+1] == '=' { return ":=" } if b == '+' && i+1 < len(text) && text[i+1] == '=' { return "+=" } if b == '-' && i+1 < len(text) && text[i+1] == '=' { return "-=" } if b == '*' && i+1 < len(text) && text[i+1] == '=' { return "*=" } if b == '/' && i+1 < len(text) && text[i+1] == '=' { return "/=" } if b == '%' && i+1 < len(text) && text[i+1] == '=' { return "%=" } if b == '&' && i+1 < len(text) && text[i+1] == '=' { return "&=" } if b == '|' && i+1 < len(text) && text[i+1] == '=' { return "|=" } if b == '^' && i+1 < len(text) && text[i+1] == '=' { return "^=" } if b == '<' && i+1 < len(text) && text[i+1] == '<' && i+2 < len(text) && text[i+2] == '=' { return "<<=" } if b == '>' && i+1 < len(text) && text[i+1] == '>' && i+2 < len(text) && text[i+2] == '=' { return ">>=" } // Plain '=' but not '==' or '!=' or '<=' or '>=' if b == '=' && (i+1 >= len(text) || text[i+1] != '=') { if i > 0 && (text[i-1] == '!' || text[i-1] == '<' || text[i-1] == '>' || text[i-1] == ':' || text[i-1] == '+' || text[i-1] == '-' || text[i-1] == '*' || text[i-1] == '/' || text[i-1] == '%' || text[i-1] == '&' || text[i-1] == '|' || text[i-1] == '^') { i++ continue } return "=" } } i++ } return "" } func extractLHS(text []byte, op string) string { idx := bytes.Index(text, []byte(op)) if idx < 0 { return "" } lhs := bytes.TrimSpace(text[:idx]) // Normalize: remove spaces around commas (mxcorpus uses "a,b" not "a, b") parts := bytes.Split(lhs, []byte(",")) for i := range parts { parts[i] = bytes.TrimSpace(parts[i]) } return string(bytes.Join(parts, []byte(","))) } func extractRangeExpr(text []byte) string { idx := bytes.Index(text, []byte("range ")) if idx < 0 { return "" } rest := bytes.TrimSpace(text[idx+6:]) // Extract the identifier refs from the range expression var parts []string i := 0 for i < len(rest) { if isIdentStart(rest[i]) { start := i for i < len(rest) && isIdent(rest[i]) { i++ } parts = append(parts, string(rest[start:i])) continue } i++ } if len(parts) > 0 { return parts[0] } return string(rest) }