astgen.mx raw

   1  package iskra
   2  
   3  import "bytes"
   4  
   5  // ASTGen produces the same indented text AST dump format as mxcorpus's
   6  // astdump.go, but from raw Moxie source text without using go/parser.
   7  
   8  // SplitDecls splits a source file into individual top-level declarations.
   9  func SplitDecls(src []byte) [][]byte {
  10  	var decls [][]byte
  11  	i := 0
  12  
  13  	for i < len(src) {
  14  		// Skip whitespace and comments
  15  		for i < len(src) {
  16  			if src[i] == ' ' || src[i] == '\t' || src[i] == '\r' || src[i] == '\n' {
  17  				i++
  18  			} else if i+1 < len(src) && src[i] == '/' && src[i+1] == '/' {
  19  				// Line comment - but check if it's a doc comment (attached to next decl)
  20  				commentStart := i
  21  				for i < len(src) && src[i] != '\n' {
  22  					i++
  23  				}
  24  				if i < len(src) {
  25  					i++
  26  				}
  27  				// Peek: if next non-blank line is also a comment or a decl keyword, this is a doc comment
  28  				saved := i
  29  				for i < len(src) && (src[i] == ' ' || src[i] == '\t' || src[i] == '\r') {
  30  					i++
  31  				}
  32  				if i < len(src) && src[i] != '\n' {
  33  					// Non-blank follows. Restore and break to let the decl scanner handle it.
  34  					i = commentStart
  35  					break
  36  				}
  37  				i = saved
  38  			} else if i+1 < len(src) && src[i] == '/' && src[i+1] == '*' {
  39  				i += 2
  40  				for i+1 < len(src) {
  41  					if src[i] == '*' && src[i+1] == '/' {
  42  						i += 2
  43  						break
  44  					}
  45  					i++
  46  				}
  47  			} else {
  48  				break
  49  			}
  50  		}
  51  		if i >= len(src) {
  52  			break
  53  		}
  54  
  55  		// Skip package and import statements
  56  		if hasWordAt(src, i, "package") {
  57  			for i < len(src) && src[i] != '\n' {
  58  				i++
  59  			}
  60  			continue
  61  		}
  62  		if hasWordAt(src, i, "import") {
  63  			i += 6
  64  			for i < len(src) && (src[i] == ' ' || src[i] == '\t') {
  65  				i++
  66  			}
  67  			if i < len(src) && src[i] == '(' {
  68  				depth := 0
  69  				for i < len(src) {
  70  					if src[i] == '(' {
  71  						depth++
  72  					} else if src[i] == ')' {
  73  						depth--
  74  						if depth == 0 {
  75  							i++
  76  							break
  77  						}
  78  					}
  79  					i++
  80  				}
  81  			} else {
  82  				for i < len(src) && src[i] != '\n' {
  83  					i++
  84  				}
  85  			}
  86  			continue
  87  		}
  88  
  89  		// This is a declaration (func, type, var, const, or doc comment + decl)
  90  		start := i
  91  		if hasWordAt(src, i, "func") || hasWordAt(src, i, "type") || hasWordAt(src, i, "var") || hasWordAt(src, i, "const") {
  92  			i = findDeclEnd(src, i)
  93  			decls = append(decls, src[start:i])
  94  		} else if src[i] == '/' && i+1 < len(src) && src[i+1] == '/' {
  95  			// Doc comment - include with following decl
  96  			for i < len(src) && src[i] != '\n' {
  97  				i++
  98  			}
  99  			if i < len(src) {
 100  				i++
 101  			}
 102  		} else {
 103  			// Skip unknown line
 104  			for i < len(src) && src[i] != '\n' {
 105  				i++
 106  			}
 107  			if i < len(src) {
 108  				i++
 109  			}
 110  		}
 111  	}
 112  	return decls
 113  }
 114  
 115  func hasWordAt(src []byte, pos int, word string) bool {
 116  	if pos+len(word) > len(src) {
 117  		return false
 118  	}
 119  	if string(src[pos:pos+len(word)]) != word {
 120  		return false
 121  	}
 122  	after := pos + len(word)
 123  	if after < len(src) && isIdent(src[after]) {
 124  		return false
 125  	}
 126  	return true
 127  }
 128  
 129  func findDeclEnd(src []byte, start int) int {
 130  	i := start
 131  	// Skip to first { or end of line (for single-line decls)
 132  	braceDepth := 0
 133  	parenDepth := 0
 134  	inString := false
 135  	var strQuote byte
 136  
 137  	for i < len(src) {
 138  		b := src[i]
 139  
 140  		if inString {
 141  			if b == '\\' && strQuote != '`' {
 142  				i += 2
 143  				continue
 144  			}
 145  			if b == strQuote {
 146  				inString = false
 147  			}
 148  			i++
 149  			continue
 150  		}
 151  
 152  		if b == '/' && i+1 < len(src) && src[i+1] == '/' {
 153  			for i < len(src) && src[i] != '\n' {
 154  				i++
 155  			}
 156  			continue
 157  		}
 158  		if b == '/' && i+1 < len(src) && src[i+1] == '*' {
 159  			i += 2
 160  			for i+1 < len(src) {
 161  				if src[i] == '*' && src[i+1] == '/' {
 162  					i += 2
 163  					break
 164  				}
 165  				i++
 166  			}
 167  			continue
 168  		}
 169  
 170  		if b == '"' || b == '\'' || b == '`' {
 171  			inString = true
 172  			strQuote = b
 173  			i++
 174  			continue
 175  		}
 176  
 177  		if b == '(' {
 178  			parenDepth++
 179  		} else if b == ')' {
 180  			parenDepth--
 181  		} else if b == '{' {
 182  			braceDepth++
 183  		} else if b == '}' {
 184  			braceDepth--
 185  			if braceDepth == 0 && parenDepth == 0 {
 186  				i++
 187  				return i
 188  			}
 189  		} else if b == '\n' && braceDepth == 0 && parenDepth == 0 {
 190  			// Single-line decl (like var x int)
 191  			// But check if next line continues (for grouped const/var)
 192  			if i+1 < len(src) {
 193  				next := i + 1
 194  				for next < len(src) && (src[next] == ' ' || src[next] == '\t' || src[next] == '\r') {
 195  					next++
 196  				}
 197  				if next < len(src) && src[next] != '\n' {
 198  					// Check if this looks like it starts a new top-level decl
 199  					if hasWordAt(src, next, "func") || hasWordAt(src, next, "type") || hasWordAt(src, next, "var") || hasWordAt(src, next, "const") || (src[next] == '/' && next+1 < len(src) && src[next+1] == '/') {
 200  						return i
 201  					}
 202  				}
 203  			}
 204  			// For now, single-line decl ends at newline
 205  			if braceDepth == 0 && parenDepth == 0 && i > start+4 {
 206  				return i
 207  			}
 208  		}
 209  		i++
 210  	}
 211  	return i
 212  }
 213  
 214  // DeclName extracts the name from a declaration.
 215  func DeclName(decl []byte) string {
 216  	g := &astGen{src: decl, pos: 0}
 217  	g.skipSpaceAndNewlines()
 218  
 219  	// Skip doc comments
 220  	for g.pos < len(g.src) && g.src[g.pos] == '/' {
 221  		for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 222  			g.pos++
 223  		}
 224  		g.skipSpaceAndNewlines()
 225  	}
 226  
 227  	if g.matchWord("func") {
 228  		g.skipSpace()
 229  		if g.peek() == '(' {
 230  			// Method: func (recv Type) Name(...)
 231  			g.skipBalanced('(', ')')
 232  			g.skipSpace()
 233  		}
 234  		return g.readIdent()
 235  	}
 236  	if g.matchWord("type") {
 237  		g.skipSpace()
 238  		return g.readIdent()
 239  	}
 240  	if g.matchWord("var") || g.matchWord("const") {
 241  		g.skipSpace()
 242  		if g.peek() == '(' {
 243  			// Grouped - return first name
 244  			g.pos++
 245  			g.skipSpaceAndNewlines()
 246  			return g.readIdent()
 247  		}
 248  		return g.readIdent()
 249  	}
 250  	return ""
 251  }
 252  
 253  // GenAST takes the source of a single declaration and produces an AST dump.
 254  func GenAST(src []byte) []byte {
 255  	g := &astGen{src: src, out: []byte{:0:len(src)}, maxDepth: 8}
 256  	g.pos = 0
 257  	g.skipSpace()
 258  
 259  	if g.matchWord("func") {
 260  		g.genFuncDecl()
 261  	} else if g.matchWord("type") {
 262  		g.genTypeDecl()
 263  	} else if g.matchWord("var") {
 264  		g.genVarDecl()
 265  	} else if g.matchWord("const") {
 266  		g.genConstDecl()
 267  	}
 268  
 269  	return g.out
 270  }
 271  
 272  type astGen struct {
 273  	src      []byte
 274  	pos      int
 275  	out      []byte
 276  	maxDepth int
 277  }
 278  
 279  // --- output helpers ---
 280  
 281  func (g *astGen) indent(depth int) {
 282  	for i := 0; i < depth; i++ {
 283  		g.out = append(g.out, ' ', ' ')
 284  	}
 285  }
 286  
 287  func (g *astGen) line(depth int, s string) {
 288  	g.indent(depth)
 289  	g.out = append(g.out, s...)
 290  	g.out = append(g.out, '\n')
 291  }
 292  
 293  func (g *astGen) lineRefs(depth int, prefix string, refs []string) {
 294  	g.indent(depth)
 295  	g.out = append(g.out, prefix...)
 296  	if len(refs) > 0 {
 297  		g.out = append(g.out, " ["...)
 298  		for i, r := range refs {
 299  			if i > 0 {
 300  				g.out = append(g.out, ',')
 301  			}
 302  			g.out = append(g.out, r...)
 303  		}
 304  		g.out = append(g.out, ']')
 305  	}
 306  	g.out = append(g.out, '\n')
 307  }
 308  
 309  // --- tokenizer helpers ---
 310  
 311  func (g *astGen) eof() bool { return g.pos >= len(g.src) }
 312  
 313  func (g *astGen) peek() byte {
 314  	if g.eof() {
 315  		return 0
 316  	}
 317  	return g.src[g.pos]
 318  }
 319  
 320  func (g *astGen) next() byte {
 321  	b := g.src[g.pos]
 322  	g.pos++
 323  	return b
 324  }
 325  
 326  func (g *astGen) skipSpace() {
 327  	for g.pos < len(g.src) {
 328  		b := g.src[g.pos]
 329  		if b == ' ' || b == '\t' || b == '\r' {
 330  			g.pos++
 331  		} else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
 332  			for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 333  				g.pos++
 334  			}
 335  		} else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' {
 336  			g.pos += 2
 337  			for g.pos+1 < len(g.src) {
 338  				if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' {
 339  					g.pos += 2
 340  					break
 341  				}
 342  				g.pos++
 343  			}
 344  		} else {
 345  			break
 346  		}
 347  	}
 348  }
 349  
 350  func (g *astGen) skipSpaceAndNewlines() {
 351  	for g.pos < len(g.src) {
 352  		b := g.src[g.pos]
 353  		if b == ' ' || b == '\t' || b == '\r' || b == '\n' {
 354  			g.pos++
 355  		} else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
 356  			for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 357  				g.pos++
 358  			}
 359  		} else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' {
 360  			g.pos += 2
 361  			for g.pos+1 < len(g.src) {
 362  				if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' {
 363  					g.pos += 2
 364  					break
 365  				}
 366  				g.pos++
 367  			}
 368  		} else {
 369  			break
 370  		}
 371  	}
 372  }
 373  
 374  func (g *astGen) matchWord(w string) bool {
 375  	g.skipSpaceAndNewlines()
 376  	if g.pos+len(w) > len(g.src) {
 377  		return false
 378  	}
 379  	if string(g.src[g.pos:g.pos+len(w)]) != w {
 380  		return false
 381  	}
 382  	after := g.pos + len(w)
 383  	if after < len(g.src) && isIdent(g.src[after]) {
 384  		return false
 385  	}
 386  	g.pos = after
 387  	return true
 388  }
 389  
 390  func (g *astGen) readIdent() string {
 391  	g.skipSpace()
 392  	start := g.pos
 393  	for g.pos < len(g.src) && isIdent(g.src[g.pos]) {
 394  		g.pos++
 395  	}
 396  	if g.pos == start {
 397  		return ""
 398  	}
 399  	return string(g.src[start:g.pos])
 400  }
 401  
 402  func isIdent(b byte) bool {
 403  	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
 404  }
 405  
 406  func isIdentStart(b byte) bool {
 407  	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
 408  }
 409  
 410  // readTypeExpr reads a type expression like []byte, *RuneIter, map[K]V, etc.
 411  func (g *astGen) readTypeExpr() string {
 412  	g.skipSpace()
 413  	if g.eof() {
 414  		return ""
 415  	}
 416  	b := g.peek()
 417  
 418  	if b == '*' {
 419  		g.pos++
 420  		return "*" | g.readTypeExpr()
 421  	}
 422  	if b == '[' {
 423  		g.pos++
 424  		g.skipSpace()
 425  		if g.peek() == ']' {
 426  			g.pos++
 427  			return "[]" | g.readTypeExpr()
 428  		}
 429  		// array type [N]T
 430  		lenExpr := g.readExprStr()
 431  		g.expect(']')
 432  		return "[" | lenExpr | "]" | g.readTypeExpr()
 433  	}
 434  	if b == '.' && g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.' {
 435  		g.pos += 3
 436  		return "..." | g.readTypeExpr()
 437  	}
 438  
 439  	if g.prefixMatch("map[") {
 440  		g.pos += 4
 441  		key := g.readTypeExpr()
 442  		g.expect(']')
 443  		val := g.readTypeExpr()
 444  		return "map[" | key | "]" | val
 445  	}
 446  	if g.prefixMatch("chan ") {
 447  		g.pos += 4
 448  		return "chan " | g.readTypeExpr()
 449  	}
 450  	if g.prefixMatch("func(") || g.prefixMatch("func (") {
 451  		g.pos += 4
 452  		g.skipSpace()
 453  		g.skipBalanced('(', ')')
 454  		g.skipSpace()
 455  		// Skip optional return type(s)
 456  		if g.peek() == '(' {
 457  			g.skipBalanced('(', ')')
 458  		} else if !g.eof() && g.peek() != ',' && g.peek() != ')' && g.peek() != '{' && g.peek() != '\n' {
 459  			g.readTypeExpr()
 460  		}
 461  		return "func(...)"
 462  	}
 463  	if g.prefixMatch("interface{") {
 464  		g.pos += 9
 465  		g.skipBalanced('{', '}')
 466  		return "interface{}"
 467  	}
 468  	if g.prefixMatch("struct{") {
 469  		g.pos += 6
 470  		g.skipBalanced('{', '}')
 471  		return "struct{...}"
 472  	}
 473  	if g.prefixMatch("struct {") {
 474  		g.pos += 7
 475  		g.skipBalanced('{', '}')
 476  		return "struct{...}"
 477  	}
 478  
 479  	name := g.readIdent()
 480  	if name == "" {
 481  		return ""
 482  	}
 483  	// Check for pkg.Type
 484  	g.skipSpace()
 485  	if g.peek() == '.' {
 486  		g.pos++
 487  		sel := g.readIdent()
 488  		return name | "." | sel
 489  	}
 490  	return name
 491  }
 492  
 493  func (g *astGen) readExprStr() string {
 494  	g.skipSpace()
 495  	start := g.pos
 496  	depth := 0
 497  	for g.pos < len(g.src) {
 498  		b := g.src[g.pos]
 499  		if b == '(' || b == '[' || b == '{' {
 500  			depth++
 501  		} else if b == ')' || b == ']' || b == '}' {
 502  			if depth == 0 {
 503  				break
 504  			}
 505  			depth--
 506  		} else if (b == ',' || b == '\n') && depth == 0 {
 507  			break
 508  		}
 509  		g.pos++
 510  	}
 511  	return string(bytes.TrimSpace(g.src[start:g.pos]))
 512  }
 513  
 514  func (g *astGen) expect(b byte) {
 515  	g.skipSpace()
 516  	if g.pos < len(g.src) && g.src[g.pos] == b {
 517  		g.pos++
 518  	}
 519  }
 520  
 521  func (g *astGen) prefixMatch(s string) bool {
 522  	if g.pos+len(s) > len(g.src) {
 523  		return false
 524  	}
 525  	return string(g.src[g.pos:g.pos+len(s)]) == s
 526  }
 527  
 528  func (g *astGen) skipBalanced(open, close byte) {
 529  	if g.peek() != open {
 530  		return
 531  	}
 532  	depth := 0
 533  	for g.pos < len(g.src) {
 534  		b := g.src[g.pos]
 535  		if b == '\'' || b == '"' || b == '`' {
 536  			g.skipStringLit(b)
 537  			continue
 538  		}
 539  		if b == open {
 540  			depth++
 541  		} else if b == close {
 542  			depth--
 543  			if depth == 0 {
 544  				g.pos++
 545  				return
 546  			}
 547  		}
 548  		g.pos++
 549  	}
 550  }
 551  
 552  // --- declaration generators ---
 553  
 554  func (g *astGen) genFuncDecl() {
 555  	g.skipSpace()
 556  	// Check for receiver: func (recv Type) Name(...)
 557  	recv := ""
 558  	if g.peek() == '(' {
 559  		saved := g.pos
 560  		g.pos++
 561  		g.skipSpace()
 562  		// Try to read receiver
 563  		name := g.readIdent()
 564  		_ = name
 565  		g.skipSpace()
 566  		recvType := g.readTypeExpr()
 567  		g.skipSpace()
 568  		if g.peek() == ')' {
 569  			g.pos++
 570  			recv = recvType
 571  		} else {
 572  			g.pos = saved
 573  		}
 574  	}
 575  
 576  	g.skipSpace()
 577  	funcName := g.readIdent()
 578  
 579  	header := "FuncDecl " | funcName
 580  	if recv != "" {
 581  		header = header | " recv=" | recv
 582  	}
 583  	g.line(0, header)
 584  
 585  	// Parse params
 586  	g.skipSpace()
 587  	if g.peek() == '(' {
 588  		params := g.parseFieldList(false)
 589  		if len(params) > 0 {
 590  			g.line(1, "Params")
 591  			for _, f := range params {
 592  				g.line(2, f)
 593  			}
 594  		}
 595  	}
 596  
 597  	// Parse results
 598  	g.skipSpace()
 599  	if g.peek() == '(' {
 600  		results := g.parseFieldList(true)
 601  		if len(results) > 0 {
 602  			g.line(1, "Results")
 603  			for _, f := range results {
 604  				g.line(2, f)
 605  			}
 606  		}
 607  	} else if g.peek() != '{' && !g.eof() {
 608  		// Single unnamed result
 609  		typeName := g.readTypeExpr()
 610  		if typeName != "" {
 611  			g.line(1, "Results")
 612  			g.line(2, typeName)
 613  		}
 614  	}
 615  
 616  	// Parse body
 617  	g.skipSpaceAndNewlines()
 618  	if g.peek() == '{' {
 619  		g.genBlock(1)
 620  	}
 621  }
 622  
 623  func (g *astGen) parseFieldList(typesOnly bool) []string {
 624  	g.expect('(')
 625  	var fields []string
 626  	for {
 627  		g.skipSpaceAndNewlines()
 628  		if g.peek() == ')' {
 629  			g.pos++
 630  			break
 631  		}
 632  		if g.eof() {
 633  			break
 634  		}
 635  
 636  		if typesOnly {
 637  			typeName := g.readTypeExpr()
 638  			if typeName != "" {
 639  				fields = append(fields, typeName)
 640  			}
 641  			g.skipSpace()
 642  			if g.peek() == ',' {
 643  				g.pos++
 644  			}
 645  			continue
 646  		}
 647  
 648  		// Try: name[,name...] type  or just type
 649  		saved := g.pos
 650  		var names []string
 651  		for {
 652  			name := g.readIdent()
 653  			if name == "" {
 654  				break
 655  			}
 656  			names = append(names, name)
 657  			g.skipSpace()
 658  			if g.peek() == ',' {
 659  				g.pos++
 660  				g.skipSpace()
 661  			} else {
 662  				break
 663  			}
 664  		}
 665  
 666  		g.skipSpace()
 667  		p := g.peek()
 668  		// Single dot after a name = qualified type (pkg.Type), not a param name.
 669  		// Three dots = variadic (...type), treat as "name type" where the type starts with ...
 670  		if len(names) == 1 && p == '.' && !(g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.') {
 671  			g.pos = saved
 672  			typeName := g.readTypeExpr()
 673  			if typeName != "" {
 674  				fields = append(fields, typeName)
 675  			}
 676  		} else if len(names) > 0 && (isIdentStart(p) || p == '*' || p == '[' || p == '.' || p == 'm' || p == 'c' || p == 'f' || p == 'i' || p == 's') {
 677  			typeName := g.readTypeExpr()
 678  			if typeName != "" {
 679  				nameStr := ""
 680  				for i, n := range names {
 681  					if i > 0 {
 682  						nameStr = nameStr | ","
 683  					}
 684  					nameStr = nameStr | n
 685  				}
 686  				fields = append(fields, nameStr | " " | typeName)
 687  			} else {
 688  				for _, n := range names {
 689  					fields = append(fields, n)
 690  				}
 691  			}
 692  		} else if len(names) > 0 {
 693  			for _, n := range names {
 694  				fields = append(fields, n)
 695  			}
 696  		} else {
 697  			g.pos = saved
 698  			typeName := g.readTypeExpr()
 699  			if typeName != "" {
 700  				fields = append(fields, typeName)
 701  			}
 702  		}
 703  
 704  		g.skipSpace()
 705  		if g.peek() == ',' {
 706  			g.pos++
 707  		}
 708  	}
 709  	return fields
 710  }
 711  
 712  func (g *astGen) genTypeDecl() {
 713  	g.line(0, "GenDecl type")
 714  	g.skipSpaceAndNewlines()
 715  	if g.peek() == '(' {
 716  		// Grouped type declaration
 717  		g.pos++
 718  		for {
 719  			g.skipSpaceAndNewlines()
 720  			if g.peek() == ')' {
 721  				g.pos++
 722  				break
 723  			}
 724  			if g.eof() {
 725  				break
 726  			}
 727  			g.genTypeSpec(1)
 728  		}
 729  	} else {
 730  		g.genTypeSpec(1)
 731  	}
 732  }
 733  
 734  func (g *astGen) genTypeSpec(depth int) {
 735  	name := g.readIdent()
 736  	if name == "" {
 737  		return
 738  	}
 739  	g.skipSpace()
 740  
 741  	// Check if it's a struct - handle specially to preserve field info
 742  	if g.prefixMatch("struct") {
 743  		g.pos += 6
 744  		g.skipSpaceAndNewlines()
 745  		if g.peek() == '{' {
 746  			g.line(depth, "Type " | name | " struct{...}")
 747  			// Parse struct fields from the body
 748  			g.pos++ // skip {
 749  			for {
 750  				g.skipSpaceAndNewlines()
 751  				if g.peek() == '}' {
 752  					g.pos++
 753  					break
 754  				}
 755  				if g.eof() {
 756  					break
 757  				}
 758  				fg := &astGen{src: g.src, pos: g.pos}
 759  				fieldName := fg.readIdent()
 760  				if fieldName == "" {
 761  					// Skip line
 762  					for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 763  						g.pos++
 764  					}
 765  					continue
 766  				}
 767  				fg.skipSpace()
 768  				fieldType := fg.readTypeExpr()
 769  				g.pos = fg.pos
 770  				if fieldType != "" {
 771  					g.line(depth+1, fieldName | " " | fieldType)
 772  				} else {
 773  					g.line(depth+1, fieldName)
 774  				}
 775  				// Skip to end of line (tags, comments)
 776  				for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 777  					g.pos++
 778  				}
 779  			}
 780  			return
 781  		}
 782  	}
 783  
 784  	typeExpr := g.readTypeExpr()
 785  	g.line(depth, "Type " | name | " " | typeExpr)
 786  }
 787  
 788  func (g *astGen) genVarDecl() {
 789  	g.line(0, "GenDecl var")
 790  	g.skipSpaceAndNewlines()
 791  	if g.peek() == '(' {
 792  		g.pos++
 793  		for {
 794  			g.skipSpaceAndNewlines()
 795  			if g.peek() == ')' {
 796  				g.pos++
 797  				break
 798  			}
 799  			if g.eof() {
 800  				break
 801  			}
 802  			g.genValueSpec(1)
 803  		}
 804  	} else {
 805  		g.genValueSpec(1)
 806  	}
 807  }
 808  
 809  func (g *astGen) genConstDecl() {
 810  	g.line(0, "GenDecl const")
 811  	g.skipSpaceAndNewlines()
 812  	if g.peek() == '(' {
 813  		g.pos++
 814  		for {
 815  			g.skipSpaceAndNewlines()
 816  			if g.peek() == ')' {
 817  				g.pos++
 818  				break
 819  			}
 820  			if g.eof() {
 821  				break
 822  			}
 823  			g.genValueSpec(1)
 824  		}
 825  	} else {
 826  		g.genValueSpec(1)
 827  	}
 828  }
 829  
 830  func (g *astGen) genValueSpec(depth int) {
 831  	var names []string
 832  	for {
 833  		name := g.readIdent()
 834  		if name == "" {
 835  			break
 836  		}
 837  		names = append(names, name)
 838  		g.skipSpace()
 839  		if g.peek() == ',' {
 840  			g.pos++
 841  			g.skipSpace()
 842  		} else {
 843  			break
 844  		}
 845  	}
 846  	if len(names) == 0 {
 847  		// Skip to next line
 848  		for g.pos < len(g.src) && g.src[g.pos] != '\n' {
 849  			g.pos++
 850  		}
 851  		return
 852  	}
 853  
 854  	nameStr := ""
 855  	for i, n := range names {
 856  		if i > 0 {
 857  			nameStr = nameStr | ","
 858  		}
 859  		nameStr = nameStr | n
 860  	}
 861  
 862  	// Check for type
 863  	g.skipSpace()
 864  	typ := ""
 865  	p := g.peek()
 866  	if isIdentStart(p) || p == '*' || p == '[' {
 867  		// Could be type or = sign
 868  		saved := g.pos
 869  		if p != '=' {
 870  			tryType := g.readTypeExpr()
 871  			g.skipSpace()
 872  			if g.peek() == '=' || g.peek() == '\n' || g.eof() || g.peek() == ')' {
 873  				typ = tryType
 874  			} else {
 875  				g.pos = saved
 876  			}
 877  		}
 878  	}
 879  
 880  	if typ != "" {
 881  		g.line(depth, "Value " | nameStr | " " | typ)
 882  	} else {
 883  		g.line(depth, "Value " | nameStr)
 884  	}
 885  
 886  	// Skip rest of line (value expression), respecting nested parens
 887  	parenD := 0
 888  	for g.pos < len(g.src) {
 889  		b := g.src[g.pos]
 890  		if b == '(' {
 891  			parenD++
 892  		} else if b == ')' {
 893  			if parenD == 0 {
 894  				break
 895  			}
 896  			parenD--
 897  		} else if b == '\n' && parenD == 0 {
 898  			break
 899  		}
 900  		g.pos++
 901  	}
 902  }
 903  
 904  // --- block and statement generators ---
 905  
 906  func (g *astGen) genBlock(depth int) {
 907  	if depth > g.maxDepth {
 908  		g.skipBalanced('{', '}')
 909  		return
 910  	}
 911  	g.line(depth, "Block")
 912  	g.expect('{')
 913  	iters := 0
 914  	for {
 915  		g.skipSpaceAndNewlines()
 916  		if g.peek() == '}' {
 917  			g.pos++
 918  			return
 919  		}
 920  		if g.eof() {
 921  			return
 922  		}
 923  		iters++
 924  		if iters > 500 {
 925  			g.skipBalanced('{', '}')
 926  			return
 927  		}
 928  		saved := g.pos
 929  		g.genStmt(depth + 1)
 930  		if g.pos == saved {
 931  			g.skipToStmtEnd()
 932  			if g.pos == saved {
 933  				g.pos++
 934  			}
 935  		}
 936  	}
 937  }
 938  
 939  func (g *astGen) genStmt(depth int) {
 940  	g.skipSpaceAndNewlines()
 941  	if g.eof() || g.peek() == '}' {
 942  		return
 943  	}
 944  
 945  	// Try keywords first
 946  	if g.matchWord("return") {
 947  		g.genReturn(depth)
 948  	} else if g.matchWord("if") {
 949  		g.genIf(depth)
 950  	} else if g.matchWord("for") {
 951  		g.genFor(depth)
 952  	} else if g.matchWord("switch") {
 953  		g.genSwitch(depth)
 954  	} else if g.matchWord("select") {
 955  		g.genSelect(depth)
 956  	} else if g.matchWord("case") {
 957  		g.genCase(depth)
 958  	} else if g.matchWord("default") {
 959  		g.genDefault(depth)
 960  	} else if g.matchWord("break") {
 961  		g.line(depth, "break")
 962  		g.skipToStmtEnd()
 963  	} else if g.matchWord("continue") {
 964  		g.line(depth, "continue")
 965  		g.skipToStmtEnd()
 966  	} else if g.matchWord("defer") {
 967  		g.genDefer(depth)
 968  	} else if g.matchWord("var") {
 969  		g.genVarStmt(depth)
 970  	} else if g.matchWord("type") {
 971  		g.genTypeStmt(depth)
 972  	} else if g.matchWord("const") {
 973  		g.genConstStmt(depth)
 974  	} else if g.matchWord("go") {
 975  		g.genGoStmt(depth)
 976  	} else {
 977  		// Assignment or expression statement
 978  		g.genAssignOrExpr(depth)
 979  	}
 980  }
 981  
 982  func (g *astGen) genReturn(depth int) {
 983  	g.skipSpace()
 984  	if g.peek() == '\n' || g.peek() == '}' || g.eof() {
 985  		g.line(depth, "Return")
 986  		return
 987  	}
 988  	// Collect refs from return expressions
 989  	stmtText := g.readToStmtEnd()
 990  	refs := collectRefsFromText(stmtText)
 991  	g.lineRefs(depth, "Return", refs)
 992  }
 993  
 994  func (g *astGen) genIf(depth int) {
 995  	// Read the condition (everything up to the opening {)
 996  	condText := g.readToBlockOpen()
 997  	refs := collectRefsFromText(condText)
 998  	g.lineRefs(depth, "If", refs)
 999  
1000  	g.skipSpaceAndNewlines()
1001  	if g.peek() == '{' {
1002  		g.genBlock(depth + 1)
1003  	}
1004  
1005  	// Check for else
1006  	g.skipSpaceAndNewlines()
1007  	if g.matchWord("else") {
1008  		g.skipSpaceAndNewlines()
1009  		if g.matchWord("if") {
1010  			g.line(depth, "Else")
1011  			g.genIf(depth + 1)
1012  		} else {
1013  			g.line(depth, "Else")
1014  			if g.peek() == '{' {
1015  				g.genBlock(depth + 1)
1016  			}
1017  		}
1018  	}
1019  }
1020  
1021  func (g *astGen) genFor(depth int) {
1022  	g.skipSpace()
1023  	if g.peek() == '{' {
1024  		g.line(depth, "For")
1025  		g.genBlock(depth + 1)
1026  		return
1027  	}
1028  
1029  	// Check for range
1030  	saved := g.pos
1031  	condText := g.readToBlockOpen()
1032  	if bytes.Contains(condText, []byte("range ")) || bytes.HasPrefix(bytes.TrimSpace(condText), []byte("range ")) {
1033  		rangeExpr := extractRangeExpr(condText)
1034  		rangeIdx := bytes.Index(condText, []byte("range "))
1035  		var refs []string
1036  		if rangeIdx >= 0 {
1037  			afterRange := condText[rangeIdx+6:]
1038  			refs = append(refs, collectRefsFromText(afterRange)...)
1039  			beforeRange := condText[:rangeIdx]
1040  			refs = append(refs, collectRefsFromText(beforeRange)...)
1041  		} else {
1042  			refs = collectRefsFromText(condText)
1043  		}
1044  		if rangeExpr != "" {
1045  			g.lineRefs(depth, "Range " | rangeExpr, refs)
1046  		} else {
1047  			g.lineRefs(depth, "Range", refs)
1048  		}
1049  		g.skipSpaceAndNewlines()
1050  		if g.peek() == '{' {
1051  			g.genBlock(depth + 1)
1052  		}
1053  		return
1054  	}
1055  
1056  	// Regular for loop
1057  	_ = saved
1058  	refs := collectRefsFromText(condText)
1059  	g.lineRefs(depth, "For", refs)
1060  
1061  	g.skipSpaceAndNewlines()
1062  	if g.peek() == '{' {
1063  		g.genBlock(depth + 1)
1064  	}
1065  }
1066  
1067  func (g *astGen) genSwitch(depth int) {
1068  	g.skipSpace()
1069  	if g.peek() == '{' {
1070  		g.line(depth, "Switch")
1071  		g.genBlock(depth + 1)
1072  		return
1073  	}
1074  
1075  	condText := g.readToBlockOpen()
1076  	refs := collectRefsFromText(condText)
1077  	g.lineRefs(depth, "Switch", refs)
1078  
1079  	g.skipSpaceAndNewlines()
1080  	if g.peek() == '{' {
1081  		g.genBlock(depth + 1)
1082  	}
1083  }
1084  
1085  func (g *astGen) genSelect(depth int) {
1086  	g.line(depth, "Select")
1087  	g.skipSpaceAndNewlines()
1088  	if g.peek() == '{' {
1089  		g.genBlock(depth + 1)
1090  	}
1091  }
1092  
1093  func (g *astGen) genCase(depth int) {
1094  	g.skipSpace()
1095  	caseText := g.readToCaseEnd()
1096  	refs := collectRefsFromText(caseText)
1097  	g.lineRefs(depth, "Case", refs)
1098  
1099  	for {
1100  		g.skipSpaceAndNewlines()
1101  		p := g.peek()
1102  		if p == '}' || g.eof() {
1103  			break
1104  		}
1105  		saved := g.pos
1106  		if g.matchWord("case") || g.matchWord("default") {
1107  			g.pos = saved
1108  			break
1109  		}
1110  		g.pos = saved
1111  		g.genStmt(depth + 1)
1112  		if g.pos == saved {
1113  			g.skipToStmtEnd()
1114  			if g.pos == saved {
1115  				g.pos++
1116  			}
1117  		}
1118  	}
1119  }
1120  
1121  func (g *astGen) genDefault(depth int) {
1122  	g.line(depth, "Default")
1123  	g.skipSpace()
1124  	if g.peek() == ':' {
1125  		g.pos++
1126  	}
1127  	for {
1128  		g.skipSpaceAndNewlines()
1129  		p := g.peek()
1130  		if p == '}' || g.eof() {
1131  			break
1132  		}
1133  		saved := g.pos
1134  		if g.matchWord("case") || g.matchWord("default") {
1135  			g.pos = saved
1136  			break
1137  		}
1138  		g.pos = saved
1139  		g.genStmt(depth + 1)
1140  		if g.pos == saved {
1141  			g.skipToStmtEnd()
1142  			if g.pos == saved {
1143  				g.pos++
1144  			}
1145  		}
1146  	}
1147  }
1148  
1149  func (g *astGen) genDefer(depth int) {
1150  	g.skipSpace()
1151  	stmtText := g.readToStmtEnd()
1152  	// Extract function name from call
1153  	funcName := ""
1154  	parenIdx := bytes.IndexByte(stmtText, '(')
1155  	if parenIdx > 0 {
1156  		funcName = string(bytes.TrimSpace(stmtText[:parenIdx]))
1157  	}
1158  	refs := collectRefsFromText(stmtText)
1159  	if funcName != "" {
1160  		g.lineRefs(depth, "Defer " | funcName, refs)
1161  	} else {
1162  		g.lineRefs(depth, "Defer", refs)
1163  	}
1164  }
1165  
1166  func (g *astGen) genGoStmt(depth int) {
1167  	g.skipSpace()
1168  	stmtText := g.readToStmtEnd()
1169  	funcName := ""
1170  	parenIdx := bytes.IndexByte(stmtText, '(')
1171  	if parenIdx > 0 {
1172  		funcName = string(bytes.TrimSpace(stmtText[:parenIdx]))
1173  	}
1174  	refs := collectRefsFromText(stmtText)
1175  	if funcName != "" {
1176  		g.lineRefs(depth, "Go " | funcName, refs)
1177  	} else {
1178  		g.lineRefs(depth, "Go", refs)
1179  	}
1180  }
1181  
1182  func (g *astGen) genVarStmt(depth int) {
1183  	g.line(depth, "GenDecl var")
1184  	g.skipSpaceAndNewlines()
1185  	if g.peek() == '(' {
1186  		g.pos++
1187  		for {
1188  			g.skipSpaceAndNewlines()
1189  			if g.peek() == ')' {
1190  				g.pos++
1191  				break
1192  			}
1193  			if g.eof() {
1194  				break
1195  			}
1196  			g.genValueSpec(depth + 1)
1197  		}
1198  	} else {
1199  		g.genValueSpec(depth + 1)
1200  	}
1201  }
1202  
1203  func (g *astGen) genTypeStmt(depth int) {
1204  	g.line(depth, "GenDecl type")
1205  	g.skipSpaceAndNewlines()
1206  	g.genTypeSpec(depth + 1)
1207  }
1208  
1209  func (g *astGen) genConstStmt(depth int) {
1210  	g.line(depth, "GenDecl const")
1211  	g.skipSpaceAndNewlines()
1212  	if g.peek() == '(' {
1213  		g.pos++
1214  		for {
1215  			g.skipSpaceAndNewlines()
1216  			if g.peek() == ')' {
1217  				g.pos++
1218  				break
1219  			}
1220  			if g.eof() {
1221  				break
1222  			}
1223  			g.genValueSpec(depth + 1)
1224  		}
1225  	} else {
1226  		g.genValueSpec(depth + 1)
1227  	}
1228  }
1229  
1230  func (g *astGen) genAssignOrExpr(depth int) {
1231  	// Read the full statement text
1232  	stmtStart := g.pos
1233  	stmtText := g.readToStmtEnd()
1234  
1235  	// Check if this is an assignment
1236  	assignOp := findAssignOp(stmtText)
1237  	if assignOp != "" {
1238  		lhs := extractLHS(stmtText, assignOp)
1239  		refs := collectRefsWithFuncLit(stmtText, assignOp)
1240  		g.lineRefs(depth, "Assign " | lhs | " " | assignOp, refs)
1241  		return
1242  	}
1243  
1244  	// Check for inc/dec
1245  	trimmed := bytes.TrimSpace(stmtText)
1246  	if len(trimmed) >= 2 {
1247  		suffix := string(trimmed[len(trimmed)-2:])
1248  		if suffix == "++" || suffix == "--" {
1249  			exprPart := string(bytes.TrimSpace(trimmed[:len(trimmed)-2]))
1250  			refs := collectRefsFromText(stmtText)
1251  			g.lineRefs(depth, exprPart | " " | suffix, refs)
1252  			return
1253  		}
1254  	}
1255  
1256  	// Expression statement
1257  	_ = stmtStart
1258  	refs := collectRefsFromText(stmtText)
1259  	exprName := normalizeCallArgs(string(bytes.TrimSpace(stmtText)))
1260  	g.lineRefs(depth, "Expr " | exprName, refs)
1261  }
1262  
1263  func normalizeCallArgs(s string) string {
1264  	idx := bytes.IndexByte([]byte(s), '(')
1265  	if idx < 0 {
1266  		return s
1267  	}
1268  	depth := 0
1269  	for i := idx; i < len(s); i++ {
1270  		if s[i] == '(' {
1271  			depth++
1272  		} else if s[i] == ')' {
1273  			depth--
1274  			if depth == 0 {
1275  				return s[:idx] | "(...)" | s[i+1:]
1276  			}
1277  		}
1278  	}
1279  	return s
1280  }
1281  
1282  // --- text readers ---
1283  
1284  func (g *astGen) readToStmtEnd() []byte {
1285  	start := g.pos
1286  	depth := 0
1287  	for g.pos < len(g.src) {
1288  		b := g.src[g.pos]
1289  		if b == '(' || b == '[' || b == '{' {
1290  			depth++
1291  		} else if b == ')' || b == ']' || b == '}' {
1292  			if depth == 0 {
1293  				break
1294  			}
1295  			depth--
1296  		} else if b == '\n' && depth == 0 {
1297  			break
1298  		}
1299  		// Stop at line comments
1300  		if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' && depth == 0 {
1301  			result := g.src[start:g.pos]
1302  			for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1303  				g.pos++
1304  			}
1305  			return result
1306  		}
1307  		// Skip string literals
1308  		if b == '"' || b == '\'' || b == '`' {
1309  			g.skipStringLit(b)
1310  			continue
1311  		}
1312  		g.pos++
1313  	}
1314  	return g.src[start:g.pos]
1315  }
1316  
1317  func (g *astGen) readToBlockOpen() []byte {
1318  	start := g.pos
1319  	depth := 0
1320  	for g.pos < len(g.src) {
1321  		b := g.src[g.pos]
1322  		if b == '{' && depth == 0 {
1323  			break
1324  		}
1325  		if b == '(' {
1326  			depth++
1327  		} else if b == ')' {
1328  			depth--
1329  		}
1330  		// Skip comments
1331  		if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
1332  			for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1333  				g.pos++
1334  			}
1335  			continue
1336  		}
1337  		// Skip string/char literals
1338  		if b == '"' || b == '\'' || b == '`' {
1339  			g.skipStringLit(b)
1340  			continue
1341  		}
1342  		g.pos++
1343  	}
1344  	return g.src[start:g.pos]
1345  }
1346  
1347  func (g *astGen) readToCaseEnd() []byte {
1348  	start := g.pos
1349  	for g.pos < len(g.src) {
1350  		b := g.src[g.pos]
1351  		if b == '\'' || b == '"' || b == '`' {
1352  			g.skipStringLit(b)
1353  			continue
1354  		}
1355  		if b == ':' {
1356  			result := g.src[start:g.pos]
1357  			g.pos++
1358  			return result
1359  		}
1360  		if b == '\n' {
1361  			break
1362  		}
1363  		g.pos++
1364  	}
1365  	return g.src[start:g.pos]
1366  }
1367  
1368  func (g *astGen) skipToStmtEnd() {
1369  	for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1370  		g.pos++
1371  	}
1372  }
1373  
1374  func (g *astGen) skipStringLit(quote byte) {
1375  	g.pos++ // skip opening quote
1376  	if quote == '`' {
1377  		for g.pos < len(g.src) && g.src[g.pos] != '`' {
1378  			g.pos++
1379  		}
1380  	} else {
1381  		for g.pos < len(g.src) && g.src[g.pos] != quote {
1382  			if g.src[g.pos] == '\\' {
1383  				g.pos++
1384  			}
1385  			g.pos++
1386  		}
1387  	}
1388  	if g.pos < len(g.src) {
1389  		g.pos++
1390  	}
1391  }
1392  
1393  // --- reference collection ---
1394  
1395  var astBuiltins = map[string]bool{
1396  	"bool": true, "byte": true, "int": true, "int8": true, "int16": true,
1397  	"int32": true, "int64": true, "uint": true, "uint8": true, "uint16": true,
1398  	"uint32": true, "uint64": true, "float32": true, "float64": true,
1399  	"string": true, "rune": true, "error": true, "any": true,
1400  	"true": true, "false": true, "nil": true,
1401  	"len": true, "cap": true, "append": true, "copy": true, "delete": true,
1402  	"close": true, "panic": true, "recover": true, "print": true, "println": true,
1403  	"make": true, "new": true,
1404  }
1405  
1406  var astKeywords = map[string]bool{
1407  	"func": true, "return": true, "if": true, "else": true, "for": true,
1408  	"range": true, "switch": true, "case": true, "default": true,
1409  	"select": true, "break": true, "continue": true, "defer": true,
1410  	"go": true, "var": true, "const": true, "type": true, "struct": true,
1411  	"interface": true, "map": true, "chan": true, "package": true,
1412  	"import": true, "fallthrough": true, "goto": true,
1413  }
1414  
1415  func collectRefsWithFuncLit(text []byte, assignOp string) []string {
1416  	opIdx := bytes.Index(text, []byte(assignOp))
1417  	if opIdx < 0 {
1418  		return collectRefsFromText(text)
1419  	}
1420  	rhs := bytes.TrimSpace(text[opIdx+len(assignOp):])
1421  	if !bytes.HasPrefix(rhs, []byte("func(")) && !bytes.HasPrefix(rhs, []byte("func (")) {
1422  		return collectRefsFromText(text)
1423  	}
1424  	bodyStart := bytes.IndexByte(rhs, '{')
1425  	if bodyStart < 0 {
1426  		return collectRefsFromText(text)
1427  	}
1428  	lhsPart := text[:opIdx]
1429  	paramPart := rhs[:bodyStart]
1430  	bodyPart := rhs[bodyStart:]
1431  
1432  	seen := map[string]bool{}
1433  	var refs []string
1434  	addRefs := func(part []byte) {
1435  		for _, r := range collectRefsFromText(part) {
1436  			if !seen[r] {
1437  				seen[r] = true
1438  				refs = append(refs, r)
1439  			}
1440  		}
1441  	}
1442  	addRefs(lhsPart)
1443  	addRefs(bodyPart)
1444  	addRefs(paramPart)
1445  	return refs
1446  }
1447  
1448  func collectRefsFromText(text []byte) []string {
1449  	seen := map[string]bool{}
1450  	var refs []string
1451  	i := 0
1452  	for i < len(text) {
1453  		b := text[i]
1454  		// Skip string/char/rune literals
1455  		if b == '"' || b == '\'' || b == '`' {
1456  			i++
1457  			if b == '`' {
1458  				for i < len(text) && text[i] != '`' {
1459  					i++
1460  				}
1461  			} else {
1462  				for i < len(text) && text[i] != b {
1463  					if text[i] == '\\' {
1464  						i++
1465  					}
1466  					i++
1467  				}
1468  			}
1469  			if i < len(text) {
1470  				i++
1471  			}
1472  			continue
1473  		}
1474  		// Skip numbers
1475  		if b >= '0' && b <= '9' {
1476  			for i < len(text) && (text[i] >= '0' && text[i] <= '9' || text[i] == 'x' || text[i] == 'X' || text[i] >= 'a' && text[i] <= 'f' || text[i] >= 'A' && text[i] <= 'F' || text[i] == '.') {
1477  				i++
1478  			}
1479  			continue
1480  		}
1481  		if isIdentStart(b) {
1482  			start := i
1483  			for i < len(text) && isIdent(text[i]) {
1484  				i++
1485  			}
1486  			name := string(text[start:i])
1487  			if !seen[name] && !astBuiltins[name] && !astKeywords[name] {
1488  				seen[name] = true
1489  				refs = append(refs, name)
1490  			}
1491  			continue
1492  		}
1493  		i++
1494  	}
1495  	return refs
1496  }
1497  
1498  func findAssignOp(text []byte) string {
1499  	depth := 0
1500  	i := 0
1501  	for i < len(text) {
1502  		b := text[i]
1503  		if b == '(' || b == '[' || b == '{' {
1504  			depth++
1505  		} else if b == ')' || b == ']' || b == '}' {
1506  			depth--
1507  		}
1508  		// Skip string literals
1509  		if b == '"' || b == '\'' || b == '`' {
1510  			i++
1511  			q := b
1512  			if q == '`' {
1513  				for i < len(text) && text[i] != '`' {
1514  					i++
1515  				}
1516  			} else {
1517  				for i < len(text) && text[i] != q {
1518  					if text[i] == '\\' {
1519  						i++
1520  					}
1521  					i++
1522  				}
1523  			}
1524  			if i < len(text) {
1525  				i++
1526  			}
1527  			continue
1528  		}
1529  		if depth == 0 {
1530  			if b == ':' && i+1 < len(text) && text[i+1] == '=' {
1531  				return ":="
1532  			}
1533  			if b == '+' && i+1 < len(text) && text[i+1] == '=' {
1534  				return "+="
1535  			}
1536  			if b == '-' && i+1 < len(text) && text[i+1] == '=' {
1537  				return "-="
1538  			}
1539  			if b == '*' && i+1 < len(text) && text[i+1] == '=' {
1540  				return "*="
1541  			}
1542  			if b == '/' && i+1 < len(text) && text[i+1] == '=' {
1543  				return "/="
1544  			}
1545  			if b == '%' && i+1 < len(text) && text[i+1] == '=' {
1546  				return "%="
1547  			}
1548  			if b == '&' && i+1 < len(text) && text[i+1] == '=' {
1549  				return "&="
1550  			}
1551  			if b == '|' && i+1 < len(text) && text[i+1] == '=' {
1552  				return "|="
1553  			}
1554  			if b == '^' && i+1 < len(text) && text[i+1] == '=' {
1555  				return "^="
1556  			}
1557  			if b == '<' && i+1 < len(text) && text[i+1] == '<' && i+2 < len(text) && text[i+2] == '=' {
1558  				return "<<="
1559  			}
1560  			if b == '>' && i+1 < len(text) && text[i+1] == '>' && i+2 < len(text) && text[i+2] == '=' {
1561  				return ">>="
1562  			}
1563  			// Plain '=' but not '==' or '!=' or '<=' or '>='
1564  			if b == '=' && (i+1 >= len(text) || text[i+1] != '=') {
1565  				if i > 0 && (text[i-1] == '!' || text[i-1] == '<' || text[i-1] == '>' || text[i-1] == ':' || text[i-1] == '+' || text[i-1] == '-' || text[i-1] == '*' || text[i-1] == '/' || text[i-1] == '%' || text[i-1] == '&' || text[i-1] == '|' || text[i-1] == '^') {
1566  					i++
1567  					continue
1568  				}
1569  				return "="
1570  			}
1571  		}
1572  		i++
1573  	}
1574  	return ""
1575  }
1576  
1577  func extractLHS(text []byte, op string) string {
1578  	idx := bytes.Index(text, []byte(op))
1579  	if idx < 0 {
1580  		return ""
1581  	}
1582  	lhs := bytes.TrimSpace(text[:idx])
1583  	// Normalize: remove spaces around commas (mxcorpus uses "a,b" not "a, b")
1584  	parts := bytes.Split(lhs, []byte(","))
1585  	for i := range parts {
1586  		parts[i] = bytes.TrimSpace(parts[i])
1587  	}
1588  	return string(bytes.Join(parts, []byte(",")))
1589  }
1590  
1591  func extractRangeExpr(text []byte) string {
1592  	idx := bytes.Index(text, []byte("range "))
1593  	if idx < 0 {
1594  		return ""
1595  	}
1596  	rest := bytes.TrimSpace(text[idx+6:])
1597  	// Extract the identifier refs from the range expression
1598  	var parts []string
1599  	i := 0
1600  	for i < len(rest) {
1601  		if isIdentStart(rest[i]) {
1602  			start := i
1603  			for i < len(rest) && isIdent(rest[i]) {
1604  				i++
1605  			}
1606  			parts = append(parts, string(rest[start:i]))
1607  			continue
1608  		}
1609  		i++
1610  	}
1611  	if len(parts) > 0 {
1612  		return parts[0]
1613  	}
1614  	return string(rest)
1615  }
1616