astgen.mx raw
1 package iskra
2
3 import "bytes"
4
5 // ASTGen produces the same indented text AST dump format as mxcorpus's
6 // astdump.go, but from raw Moxie source text without using go/parser.
7
8 // SplitDecls splits a source file into individual top-level declarations.
9 func SplitDecls(src []byte) [][]byte {
10 var decls [][]byte
11 i := 0
12
13 for i < len(src) {
14 // Skip whitespace and comments
15 for i < len(src) {
16 if src[i] == ' ' || src[i] == '\t' || src[i] == '\r' || src[i] == '\n' {
17 i++
18 } else if i+1 < len(src) && src[i] == '/' && src[i+1] == '/' {
19 // Line comment - but check if it's a doc comment (attached to next decl)
20 commentStart := i
21 for i < len(src) && src[i] != '\n' {
22 i++
23 }
24 if i < len(src) {
25 i++
26 }
27 // Peek: if next non-blank line is also a comment or a decl keyword, this is a doc comment
28 saved := i
29 for i < len(src) && (src[i] == ' ' || src[i] == '\t' || src[i] == '\r') {
30 i++
31 }
32 if i < len(src) && src[i] != '\n' {
33 // Non-blank follows. Restore and break to let the decl scanner handle it.
34 i = commentStart
35 break
36 }
37 i = saved
38 } else if i+1 < len(src) && src[i] == '/' && src[i+1] == '*' {
39 i += 2
40 for i+1 < len(src) {
41 if src[i] == '*' && src[i+1] == '/' {
42 i += 2
43 break
44 }
45 i++
46 }
47 } else {
48 break
49 }
50 }
51 if i >= len(src) {
52 break
53 }
54
55 // Skip package and import statements
56 if hasWordAt(src, i, "package") {
57 for i < len(src) && src[i] != '\n' {
58 i++
59 }
60 continue
61 }
62 if hasWordAt(src, i, "import") {
63 i += 6
64 for i < len(src) && (src[i] == ' ' || src[i] == '\t') {
65 i++
66 }
67 if i < len(src) && src[i] == '(' {
68 depth := 0
69 for i < len(src) {
70 if src[i] == '(' {
71 depth++
72 } else if src[i] == ')' {
73 depth--
74 if depth == 0 {
75 i++
76 break
77 }
78 }
79 i++
80 }
81 } else {
82 for i < len(src) && src[i] != '\n' {
83 i++
84 }
85 }
86 continue
87 }
88
89 // This is a declaration (func, type, var, const, or doc comment + decl)
90 start := i
91 if hasWordAt(src, i, "func") || hasWordAt(src, i, "type") || hasWordAt(src, i, "var") || hasWordAt(src, i, "const") {
92 i = findDeclEnd(src, i)
93 decls = append(decls, src[start:i])
94 } else if src[i] == '/' && i+1 < len(src) && src[i+1] == '/' {
95 // Doc comment - include with following decl
96 for i < len(src) && src[i] != '\n' {
97 i++
98 }
99 if i < len(src) {
100 i++
101 }
102 } else {
103 // Skip unknown line
104 for i < len(src) && src[i] != '\n' {
105 i++
106 }
107 if i < len(src) {
108 i++
109 }
110 }
111 }
112 return decls
113 }
114
115 func hasWordAt(src []byte, pos int, word string) bool {
116 if pos+len(word) > len(src) {
117 return false
118 }
119 if string(src[pos:pos+len(word)]) != word {
120 return false
121 }
122 after := pos + len(word)
123 if after < len(src) && isIdent(src[after]) {
124 return false
125 }
126 return true
127 }
128
129 func findDeclEnd(src []byte, start int) int {
130 i := start
131 // Skip to first { or end of line (for single-line decls)
132 braceDepth := 0
133 parenDepth := 0
134 inString := false
135 var strQuote byte
136
137 for i < len(src) {
138 b := src[i]
139
140 if inString {
141 if b == '\\' && strQuote != '`' {
142 i += 2
143 continue
144 }
145 if b == strQuote {
146 inString = false
147 }
148 i++
149 continue
150 }
151
152 if b == '/' && i+1 < len(src) && src[i+1] == '/' {
153 for i < len(src) && src[i] != '\n' {
154 i++
155 }
156 continue
157 }
158 if b == '/' && i+1 < len(src) && src[i+1] == '*' {
159 i += 2
160 for i+1 < len(src) {
161 if src[i] == '*' && src[i+1] == '/' {
162 i += 2
163 break
164 }
165 i++
166 }
167 continue
168 }
169
170 if b == '"' || b == '\'' || b == '`' {
171 inString = true
172 strQuote = b
173 i++
174 continue
175 }
176
177 if b == '(' {
178 parenDepth++
179 } else if b == ')' {
180 parenDepth--
181 } else if b == '{' {
182 braceDepth++
183 } else if b == '}' {
184 braceDepth--
185 if braceDepth == 0 && parenDepth == 0 {
186 i++
187 return i
188 }
189 } else if b == '\n' && braceDepth == 0 && parenDepth == 0 {
190 // Single-line decl (like var x int)
191 // But check if next line continues (for grouped const/var)
192 if i+1 < len(src) {
193 next := i + 1
194 for next < len(src) && (src[next] == ' ' || src[next] == '\t' || src[next] == '\r') {
195 next++
196 }
197 if next < len(src) && src[next] != '\n' {
198 // Check if this looks like it starts a new top-level decl
199 if hasWordAt(src, next, "func") || hasWordAt(src, next, "type") || hasWordAt(src, next, "var") || hasWordAt(src, next, "const") || (src[next] == '/' && next+1 < len(src) && src[next+1] == '/') {
200 return i
201 }
202 }
203 }
204 // For now, single-line decl ends at newline
205 if braceDepth == 0 && parenDepth == 0 && i > start+4 {
206 return i
207 }
208 }
209 i++
210 }
211 return i
212 }
213
214 // DeclName extracts the name from a declaration.
215 func DeclName(decl []byte) string {
216 g := &astGen{src: decl, pos: 0}
217 g.skipSpaceAndNewlines()
218
219 // Skip doc comments
220 for g.pos < len(g.src) && g.src[g.pos] == '/' {
221 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
222 g.pos++
223 }
224 g.skipSpaceAndNewlines()
225 }
226
227 if g.matchWord("func") {
228 g.skipSpace()
229 if g.peek() == '(' {
230 // Method: func (recv Type) Name(...)
231 g.skipBalanced('(', ')')
232 g.skipSpace()
233 }
234 return g.readIdent()
235 }
236 if g.matchWord("type") {
237 g.skipSpace()
238 return g.readIdent()
239 }
240 if g.matchWord("var") || g.matchWord("const") {
241 g.skipSpace()
242 if g.peek() == '(' {
243 // Grouped - return first name
244 g.pos++
245 g.skipSpaceAndNewlines()
246 return g.readIdent()
247 }
248 return g.readIdent()
249 }
250 return ""
251 }
252
253 // GenAST takes the source of a single declaration and produces an AST dump.
254 func GenAST(src []byte) []byte {
255 g := &astGen{src: src, out: []byte{:0:len(src)}, maxDepth: 8}
256 g.pos = 0
257 g.skipSpace()
258
259 if g.matchWord("func") {
260 g.genFuncDecl()
261 } else if g.matchWord("type") {
262 g.genTypeDecl()
263 } else if g.matchWord("var") {
264 g.genVarDecl()
265 } else if g.matchWord("const") {
266 g.genConstDecl()
267 }
268
269 return g.out
270 }
271
272 type astGen struct {
273 src []byte
274 pos int
275 out []byte
276 maxDepth int
277 }
278
279 // --- output helpers ---
280
281 func (g *astGen) indent(depth int) {
282 for i := 0; i < depth; i++ {
283 g.out = append(g.out, ' ', ' ')
284 }
285 }
286
287 func (g *astGen) line(depth int, s string) {
288 g.indent(depth)
289 g.out = append(g.out, s...)
290 g.out = append(g.out, '\n')
291 }
292
293 func (g *astGen) lineRefs(depth int, prefix string, refs []string) {
294 g.indent(depth)
295 g.out = append(g.out, prefix...)
296 if len(refs) > 0 {
297 g.out = append(g.out, " ["...)
298 for i, r := range refs {
299 if i > 0 {
300 g.out = append(g.out, ',')
301 }
302 g.out = append(g.out, r...)
303 }
304 g.out = append(g.out, ']')
305 }
306 g.out = append(g.out, '\n')
307 }
308
309 // --- tokenizer helpers ---
310
311 func (g *astGen) eof() bool { return g.pos >= len(g.src) }
312
313 func (g *astGen) peek() byte {
314 if g.eof() {
315 return 0
316 }
317 return g.src[g.pos]
318 }
319
320 func (g *astGen) next() byte {
321 b := g.src[g.pos]
322 g.pos++
323 return b
324 }
325
326 func (g *astGen) skipSpace() {
327 for g.pos < len(g.src) {
328 b := g.src[g.pos]
329 if b == ' ' || b == '\t' || b == '\r' {
330 g.pos++
331 } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
332 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
333 g.pos++
334 }
335 } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' {
336 g.pos += 2
337 for g.pos+1 < len(g.src) {
338 if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' {
339 g.pos += 2
340 break
341 }
342 g.pos++
343 }
344 } else {
345 break
346 }
347 }
348 }
349
350 func (g *astGen) skipSpaceAndNewlines() {
351 for g.pos < len(g.src) {
352 b := g.src[g.pos]
353 if b == ' ' || b == '\t' || b == '\r' || b == '\n' {
354 g.pos++
355 } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
356 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
357 g.pos++
358 }
359 } else if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '*' {
360 g.pos += 2
361 for g.pos+1 < len(g.src) {
362 if g.src[g.pos] == '*' && g.src[g.pos+1] == '/' {
363 g.pos += 2
364 break
365 }
366 g.pos++
367 }
368 } else {
369 break
370 }
371 }
372 }
373
374 func (g *astGen) matchWord(w string) bool {
375 g.skipSpaceAndNewlines()
376 if g.pos+len(w) > len(g.src) {
377 return false
378 }
379 if string(g.src[g.pos:g.pos+len(w)]) != w {
380 return false
381 }
382 after := g.pos + len(w)
383 if after < len(g.src) && isIdent(g.src[after]) {
384 return false
385 }
386 g.pos = after
387 return true
388 }
389
390 func (g *astGen) readIdent() string {
391 g.skipSpace()
392 start := g.pos
393 for g.pos < len(g.src) && isIdent(g.src[g.pos]) {
394 g.pos++
395 }
396 if g.pos == start {
397 return ""
398 }
399 return string(g.src[start:g.pos])
400 }
401
402 func isIdent(b byte) bool {
403 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
404 }
405
406 func isIdentStart(b byte) bool {
407 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
408 }
409
410 // readTypeExpr reads a type expression like []byte, *RuneIter, map[K]V, etc.
411 func (g *astGen) readTypeExpr() string {
412 g.skipSpace()
413 if g.eof() {
414 return ""
415 }
416 b := g.peek()
417
418 if b == '*' {
419 g.pos++
420 return "*" | g.readTypeExpr()
421 }
422 if b == '[' {
423 g.pos++
424 g.skipSpace()
425 if g.peek() == ']' {
426 g.pos++
427 return "[]" | g.readTypeExpr()
428 }
429 // array type [N]T
430 lenExpr := g.readExprStr()
431 g.expect(']')
432 return "[" | lenExpr | "]" | g.readTypeExpr()
433 }
434 if b == '.' && g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.' {
435 g.pos += 3
436 return "..." | g.readTypeExpr()
437 }
438
439 if g.prefixMatch("map[") {
440 g.pos += 4
441 key := g.readTypeExpr()
442 g.expect(']')
443 val := g.readTypeExpr()
444 return "map[" | key | "]" | val
445 }
446 if g.prefixMatch("chan ") {
447 g.pos += 4
448 return "chan " | g.readTypeExpr()
449 }
450 if g.prefixMatch("func(") || g.prefixMatch("func (") {
451 g.pos += 4
452 g.skipSpace()
453 g.skipBalanced('(', ')')
454 g.skipSpace()
455 // Skip optional return type(s)
456 if g.peek() == '(' {
457 g.skipBalanced('(', ')')
458 } else if !g.eof() && g.peek() != ',' && g.peek() != ')' && g.peek() != '{' && g.peek() != '\n' {
459 g.readTypeExpr()
460 }
461 return "func(...)"
462 }
463 if g.prefixMatch("interface{") {
464 g.pos += 9
465 g.skipBalanced('{', '}')
466 return "interface{}"
467 }
468 if g.prefixMatch("struct{") {
469 g.pos += 6
470 g.skipBalanced('{', '}')
471 return "struct{...}"
472 }
473 if g.prefixMatch("struct {") {
474 g.pos += 7
475 g.skipBalanced('{', '}')
476 return "struct{...}"
477 }
478
479 name := g.readIdent()
480 if name == "" {
481 return ""
482 }
483 // Check for pkg.Type
484 g.skipSpace()
485 if g.peek() == '.' {
486 g.pos++
487 sel := g.readIdent()
488 return name | "." | sel
489 }
490 return name
491 }
492
493 func (g *astGen) readExprStr() string {
494 g.skipSpace()
495 start := g.pos
496 depth := 0
497 for g.pos < len(g.src) {
498 b := g.src[g.pos]
499 if b == '(' || b == '[' || b == '{' {
500 depth++
501 } else if b == ')' || b == ']' || b == '}' {
502 if depth == 0 {
503 break
504 }
505 depth--
506 } else if (b == ',' || b == '\n') && depth == 0 {
507 break
508 }
509 g.pos++
510 }
511 return string(bytes.TrimSpace(g.src[start:g.pos]))
512 }
513
514 func (g *astGen) expect(b byte) {
515 g.skipSpace()
516 if g.pos < len(g.src) && g.src[g.pos] == b {
517 g.pos++
518 }
519 }
520
521 func (g *astGen) prefixMatch(s string) bool {
522 if g.pos+len(s) > len(g.src) {
523 return false
524 }
525 return string(g.src[g.pos:g.pos+len(s)]) == s
526 }
527
528 func (g *astGen) skipBalanced(open, close byte) {
529 if g.peek() != open {
530 return
531 }
532 depth := 0
533 for g.pos < len(g.src) {
534 b := g.src[g.pos]
535 if b == '\'' || b == '"' || b == '`' {
536 g.skipStringLit(b)
537 continue
538 }
539 if b == open {
540 depth++
541 } else if b == close {
542 depth--
543 if depth == 0 {
544 g.pos++
545 return
546 }
547 }
548 g.pos++
549 }
550 }
551
552 // --- declaration generators ---
553
554 func (g *astGen) genFuncDecl() {
555 g.skipSpace()
556 // Check for receiver: func (recv Type) Name(...)
557 recv := ""
558 if g.peek() == '(' {
559 saved := g.pos
560 g.pos++
561 g.skipSpace()
562 // Try to read receiver
563 name := g.readIdent()
564 _ = name
565 g.skipSpace()
566 recvType := g.readTypeExpr()
567 g.skipSpace()
568 if g.peek() == ')' {
569 g.pos++
570 recv = recvType
571 } else {
572 g.pos = saved
573 }
574 }
575
576 g.skipSpace()
577 funcName := g.readIdent()
578
579 header := "FuncDecl " | funcName
580 if recv != "" {
581 header = header | " recv=" | recv
582 }
583 g.line(0, header)
584
585 // Parse params
586 g.skipSpace()
587 if g.peek() == '(' {
588 params := g.parseFieldList(false)
589 if len(params) > 0 {
590 g.line(1, "Params")
591 for _, f := range params {
592 g.line(2, f)
593 }
594 }
595 }
596
597 // Parse results
598 g.skipSpace()
599 if g.peek() == '(' {
600 results := g.parseFieldList(true)
601 if len(results) > 0 {
602 g.line(1, "Results")
603 for _, f := range results {
604 g.line(2, f)
605 }
606 }
607 } else if g.peek() != '{' && !g.eof() {
608 // Single unnamed result
609 typeName := g.readTypeExpr()
610 if typeName != "" {
611 g.line(1, "Results")
612 g.line(2, typeName)
613 }
614 }
615
616 // Parse body
617 g.skipSpaceAndNewlines()
618 if g.peek() == '{' {
619 g.genBlock(1)
620 }
621 }
622
623 func (g *astGen) parseFieldList(typesOnly bool) []string {
624 g.expect('(')
625 var fields []string
626 for {
627 g.skipSpaceAndNewlines()
628 if g.peek() == ')' {
629 g.pos++
630 break
631 }
632 if g.eof() {
633 break
634 }
635
636 if typesOnly {
637 typeName := g.readTypeExpr()
638 if typeName != "" {
639 fields = append(fields, typeName)
640 }
641 g.skipSpace()
642 if g.peek() == ',' {
643 g.pos++
644 }
645 continue
646 }
647
648 // Try: name[,name...] type or just type
649 saved := g.pos
650 var names []string
651 for {
652 name := g.readIdent()
653 if name == "" {
654 break
655 }
656 names = append(names, name)
657 g.skipSpace()
658 if g.peek() == ',' {
659 g.pos++
660 g.skipSpace()
661 } else {
662 break
663 }
664 }
665
666 g.skipSpace()
667 p := g.peek()
668 // Single dot after a name = qualified type (pkg.Type), not a param name.
669 // Three dots = variadic (...type), treat as "name type" where the type starts with ...
670 if len(names) == 1 && p == '.' && !(g.pos+2 < len(g.src) && g.src[g.pos+1] == '.' && g.src[g.pos+2] == '.') {
671 g.pos = saved
672 typeName := g.readTypeExpr()
673 if typeName != "" {
674 fields = append(fields, typeName)
675 }
676 } else if len(names) > 0 && (isIdentStart(p) || p == '*' || p == '[' || p == '.' || p == 'm' || p == 'c' || p == 'f' || p == 'i' || p == 's') {
677 typeName := g.readTypeExpr()
678 if typeName != "" {
679 nameStr := ""
680 for i, n := range names {
681 if i > 0 {
682 nameStr = nameStr | ","
683 }
684 nameStr = nameStr | n
685 }
686 fields = append(fields, nameStr | " " | typeName)
687 } else {
688 for _, n := range names {
689 fields = append(fields, n)
690 }
691 }
692 } else if len(names) > 0 {
693 for _, n := range names {
694 fields = append(fields, n)
695 }
696 } else {
697 g.pos = saved
698 typeName := g.readTypeExpr()
699 if typeName != "" {
700 fields = append(fields, typeName)
701 }
702 }
703
704 g.skipSpace()
705 if g.peek() == ',' {
706 g.pos++
707 }
708 }
709 return fields
710 }
711
712 func (g *astGen) genTypeDecl() {
713 g.line(0, "GenDecl type")
714 g.skipSpaceAndNewlines()
715 if g.peek() == '(' {
716 // Grouped type declaration
717 g.pos++
718 for {
719 g.skipSpaceAndNewlines()
720 if g.peek() == ')' {
721 g.pos++
722 break
723 }
724 if g.eof() {
725 break
726 }
727 g.genTypeSpec(1)
728 }
729 } else {
730 g.genTypeSpec(1)
731 }
732 }
733
734 func (g *astGen) genTypeSpec(depth int) {
735 name := g.readIdent()
736 if name == "" {
737 return
738 }
739 g.skipSpace()
740
741 // Check if it's a struct - handle specially to preserve field info
742 if g.prefixMatch("struct") {
743 g.pos += 6
744 g.skipSpaceAndNewlines()
745 if g.peek() == '{' {
746 g.line(depth, "Type " | name | " struct{...}")
747 // Parse struct fields from the body
748 g.pos++ // skip {
749 for {
750 g.skipSpaceAndNewlines()
751 if g.peek() == '}' {
752 g.pos++
753 break
754 }
755 if g.eof() {
756 break
757 }
758 fg := &astGen{src: g.src, pos: g.pos}
759 fieldName := fg.readIdent()
760 if fieldName == "" {
761 // Skip line
762 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
763 g.pos++
764 }
765 continue
766 }
767 fg.skipSpace()
768 fieldType := fg.readTypeExpr()
769 g.pos = fg.pos
770 if fieldType != "" {
771 g.line(depth+1, fieldName | " " | fieldType)
772 } else {
773 g.line(depth+1, fieldName)
774 }
775 // Skip to end of line (tags, comments)
776 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
777 g.pos++
778 }
779 }
780 return
781 }
782 }
783
784 typeExpr := g.readTypeExpr()
785 g.line(depth, "Type " | name | " " | typeExpr)
786 }
787
788 func (g *astGen) genVarDecl() {
789 g.line(0, "GenDecl var")
790 g.skipSpaceAndNewlines()
791 if g.peek() == '(' {
792 g.pos++
793 for {
794 g.skipSpaceAndNewlines()
795 if g.peek() == ')' {
796 g.pos++
797 break
798 }
799 if g.eof() {
800 break
801 }
802 g.genValueSpec(1)
803 }
804 } else {
805 g.genValueSpec(1)
806 }
807 }
808
809 func (g *astGen) genConstDecl() {
810 g.line(0, "GenDecl const")
811 g.skipSpaceAndNewlines()
812 if g.peek() == '(' {
813 g.pos++
814 for {
815 g.skipSpaceAndNewlines()
816 if g.peek() == ')' {
817 g.pos++
818 break
819 }
820 if g.eof() {
821 break
822 }
823 g.genValueSpec(1)
824 }
825 } else {
826 g.genValueSpec(1)
827 }
828 }
829
830 func (g *astGen) genValueSpec(depth int) {
831 var names []string
832 for {
833 name := g.readIdent()
834 if name == "" {
835 break
836 }
837 names = append(names, name)
838 g.skipSpace()
839 if g.peek() == ',' {
840 g.pos++
841 g.skipSpace()
842 } else {
843 break
844 }
845 }
846 if len(names) == 0 {
847 // Skip to next line
848 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
849 g.pos++
850 }
851 return
852 }
853
854 nameStr := ""
855 for i, n := range names {
856 if i > 0 {
857 nameStr = nameStr | ","
858 }
859 nameStr = nameStr | n
860 }
861
862 // Check for type
863 g.skipSpace()
864 typ := ""
865 p := g.peek()
866 if isIdentStart(p) || p == '*' || p == '[' {
867 // Could be type or = sign
868 saved := g.pos
869 if p != '=' {
870 tryType := g.readTypeExpr()
871 g.skipSpace()
872 if g.peek() == '=' || g.peek() == '\n' || g.eof() || g.peek() == ')' {
873 typ = tryType
874 } else {
875 g.pos = saved
876 }
877 }
878 }
879
880 if typ != "" {
881 g.line(depth, "Value " | nameStr | " " | typ)
882 } else {
883 g.line(depth, "Value " | nameStr)
884 }
885
886 // Skip rest of line (value expression), respecting nested parens
887 parenD := 0
888 for g.pos < len(g.src) {
889 b := g.src[g.pos]
890 if b == '(' {
891 parenD++
892 } else if b == ')' {
893 if parenD == 0 {
894 break
895 }
896 parenD--
897 } else if b == '\n' && parenD == 0 {
898 break
899 }
900 g.pos++
901 }
902 }
903
904 // --- block and statement generators ---
905
906 func (g *astGen) genBlock(depth int) {
907 if depth > g.maxDepth {
908 g.skipBalanced('{', '}')
909 return
910 }
911 g.line(depth, "Block")
912 g.expect('{')
913 iters := 0
914 for {
915 g.skipSpaceAndNewlines()
916 if g.peek() == '}' {
917 g.pos++
918 return
919 }
920 if g.eof() {
921 return
922 }
923 iters++
924 if iters > 500 {
925 g.skipBalanced('{', '}')
926 return
927 }
928 saved := g.pos
929 g.genStmt(depth + 1)
930 if g.pos == saved {
931 g.skipToStmtEnd()
932 if g.pos == saved {
933 g.pos++
934 }
935 }
936 }
937 }
938
939 func (g *astGen) genStmt(depth int) {
940 g.skipSpaceAndNewlines()
941 if g.eof() || g.peek() == '}' {
942 return
943 }
944
945 // Try keywords first
946 if g.matchWord("return") {
947 g.genReturn(depth)
948 } else if g.matchWord("if") {
949 g.genIf(depth)
950 } else if g.matchWord("for") {
951 g.genFor(depth)
952 } else if g.matchWord("switch") {
953 g.genSwitch(depth)
954 } else if g.matchWord("select") {
955 g.genSelect(depth)
956 } else if g.matchWord("case") {
957 g.genCase(depth)
958 } else if g.matchWord("default") {
959 g.genDefault(depth)
960 } else if g.matchWord("break") {
961 g.line(depth, "break")
962 g.skipToStmtEnd()
963 } else if g.matchWord("continue") {
964 g.line(depth, "continue")
965 g.skipToStmtEnd()
966 } else if g.matchWord("defer") {
967 g.genDefer(depth)
968 } else if g.matchWord("var") {
969 g.genVarStmt(depth)
970 } else if g.matchWord("type") {
971 g.genTypeStmt(depth)
972 } else if g.matchWord("const") {
973 g.genConstStmt(depth)
974 } else if g.matchWord("go") {
975 g.genGoStmt(depth)
976 } else {
977 // Assignment or expression statement
978 g.genAssignOrExpr(depth)
979 }
980 }
981
982 func (g *astGen) genReturn(depth int) {
983 g.skipSpace()
984 if g.peek() == '\n' || g.peek() == '}' || g.eof() {
985 g.line(depth, "Return")
986 return
987 }
988 // Collect refs from return expressions
989 stmtText := g.readToStmtEnd()
990 refs := collectRefsFromText(stmtText)
991 g.lineRefs(depth, "Return", refs)
992 }
993
994 func (g *astGen) genIf(depth int) {
995 // Read the condition (everything up to the opening {)
996 condText := g.readToBlockOpen()
997 refs := collectRefsFromText(condText)
998 g.lineRefs(depth, "If", refs)
999
1000 g.skipSpaceAndNewlines()
1001 if g.peek() == '{' {
1002 g.genBlock(depth + 1)
1003 }
1004
1005 // Check for else
1006 g.skipSpaceAndNewlines()
1007 if g.matchWord("else") {
1008 g.skipSpaceAndNewlines()
1009 if g.matchWord("if") {
1010 g.line(depth, "Else")
1011 g.genIf(depth + 1)
1012 } else {
1013 g.line(depth, "Else")
1014 if g.peek() == '{' {
1015 g.genBlock(depth + 1)
1016 }
1017 }
1018 }
1019 }
1020
1021 func (g *astGen) genFor(depth int) {
1022 g.skipSpace()
1023 if g.peek() == '{' {
1024 g.line(depth, "For")
1025 g.genBlock(depth + 1)
1026 return
1027 }
1028
1029 // Check for range
1030 saved := g.pos
1031 condText := g.readToBlockOpen()
1032 if bytes.Contains(condText, []byte("range ")) || bytes.HasPrefix(bytes.TrimSpace(condText), []byte("range ")) {
1033 rangeExpr := extractRangeExpr(condText)
1034 rangeIdx := bytes.Index(condText, []byte("range "))
1035 var refs []string
1036 if rangeIdx >= 0 {
1037 afterRange := condText[rangeIdx+6:]
1038 refs = append(refs, collectRefsFromText(afterRange)...)
1039 beforeRange := condText[:rangeIdx]
1040 refs = append(refs, collectRefsFromText(beforeRange)...)
1041 } else {
1042 refs = collectRefsFromText(condText)
1043 }
1044 if rangeExpr != "" {
1045 g.lineRefs(depth, "Range " | rangeExpr, refs)
1046 } else {
1047 g.lineRefs(depth, "Range", refs)
1048 }
1049 g.skipSpaceAndNewlines()
1050 if g.peek() == '{' {
1051 g.genBlock(depth + 1)
1052 }
1053 return
1054 }
1055
1056 // Regular for loop
1057 _ = saved
1058 refs := collectRefsFromText(condText)
1059 g.lineRefs(depth, "For", refs)
1060
1061 g.skipSpaceAndNewlines()
1062 if g.peek() == '{' {
1063 g.genBlock(depth + 1)
1064 }
1065 }
1066
1067 func (g *astGen) genSwitch(depth int) {
1068 g.skipSpace()
1069 if g.peek() == '{' {
1070 g.line(depth, "Switch")
1071 g.genBlock(depth + 1)
1072 return
1073 }
1074
1075 condText := g.readToBlockOpen()
1076 refs := collectRefsFromText(condText)
1077 g.lineRefs(depth, "Switch", refs)
1078
1079 g.skipSpaceAndNewlines()
1080 if g.peek() == '{' {
1081 g.genBlock(depth + 1)
1082 }
1083 }
1084
1085 func (g *astGen) genSelect(depth int) {
1086 g.line(depth, "Select")
1087 g.skipSpaceAndNewlines()
1088 if g.peek() == '{' {
1089 g.genBlock(depth + 1)
1090 }
1091 }
1092
1093 func (g *astGen) genCase(depth int) {
1094 g.skipSpace()
1095 caseText := g.readToCaseEnd()
1096 refs := collectRefsFromText(caseText)
1097 g.lineRefs(depth, "Case", refs)
1098
1099 for {
1100 g.skipSpaceAndNewlines()
1101 p := g.peek()
1102 if p == '}' || g.eof() {
1103 break
1104 }
1105 saved := g.pos
1106 if g.matchWord("case") || g.matchWord("default") {
1107 g.pos = saved
1108 break
1109 }
1110 g.pos = saved
1111 g.genStmt(depth + 1)
1112 if g.pos == saved {
1113 g.skipToStmtEnd()
1114 if g.pos == saved {
1115 g.pos++
1116 }
1117 }
1118 }
1119 }
1120
1121 func (g *astGen) genDefault(depth int) {
1122 g.line(depth, "Default")
1123 g.skipSpace()
1124 if g.peek() == ':' {
1125 g.pos++
1126 }
1127 for {
1128 g.skipSpaceAndNewlines()
1129 p := g.peek()
1130 if p == '}' || g.eof() {
1131 break
1132 }
1133 saved := g.pos
1134 if g.matchWord("case") || g.matchWord("default") {
1135 g.pos = saved
1136 break
1137 }
1138 g.pos = saved
1139 g.genStmt(depth + 1)
1140 if g.pos == saved {
1141 g.skipToStmtEnd()
1142 if g.pos == saved {
1143 g.pos++
1144 }
1145 }
1146 }
1147 }
1148
1149 func (g *astGen) genDefer(depth int) {
1150 g.skipSpace()
1151 stmtText := g.readToStmtEnd()
1152 // Extract function name from call
1153 funcName := ""
1154 parenIdx := bytes.IndexByte(stmtText, '(')
1155 if parenIdx > 0 {
1156 funcName = string(bytes.TrimSpace(stmtText[:parenIdx]))
1157 }
1158 refs := collectRefsFromText(stmtText)
1159 if funcName != "" {
1160 g.lineRefs(depth, "Defer " | funcName, refs)
1161 } else {
1162 g.lineRefs(depth, "Defer", refs)
1163 }
1164 }
1165
1166 func (g *astGen) genGoStmt(depth int) {
1167 g.skipSpace()
1168 stmtText := g.readToStmtEnd()
1169 funcName := ""
1170 parenIdx := bytes.IndexByte(stmtText, '(')
1171 if parenIdx > 0 {
1172 funcName = string(bytes.TrimSpace(stmtText[:parenIdx]))
1173 }
1174 refs := collectRefsFromText(stmtText)
1175 if funcName != "" {
1176 g.lineRefs(depth, "Go " | funcName, refs)
1177 } else {
1178 g.lineRefs(depth, "Go", refs)
1179 }
1180 }
1181
1182 func (g *astGen) genVarStmt(depth int) {
1183 g.line(depth, "GenDecl var")
1184 g.skipSpaceAndNewlines()
1185 if g.peek() == '(' {
1186 g.pos++
1187 for {
1188 g.skipSpaceAndNewlines()
1189 if g.peek() == ')' {
1190 g.pos++
1191 break
1192 }
1193 if g.eof() {
1194 break
1195 }
1196 g.genValueSpec(depth + 1)
1197 }
1198 } else {
1199 g.genValueSpec(depth + 1)
1200 }
1201 }
1202
1203 func (g *astGen) genTypeStmt(depth int) {
1204 g.line(depth, "GenDecl type")
1205 g.skipSpaceAndNewlines()
1206 g.genTypeSpec(depth + 1)
1207 }
1208
1209 func (g *astGen) genConstStmt(depth int) {
1210 g.line(depth, "GenDecl const")
1211 g.skipSpaceAndNewlines()
1212 if g.peek() == '(' {
1213 g.pos++
1214 for {
1215 g.skipSpaceAndNewlines()
1216 if g.peek() == ')' {
1217 g.pos++
1218 break
1219 }
1220 if g.eof() {
1221 break
1222 }
1223 g.genValueSpec(depth + 1)
1224 }
1225 } else {
1226 g.genValueSpec(depth + 1)
1227 }
1228 }
1229
1230 func (g *astGen) genAssignOrExpr(depth int) {
1231 // Read the full statement text
1232 stmtStart := g.pos
1233 stmtText := g.readToStmtEnd()
1234
1235 // Check if this is an assignment
1236 assignOp := findAssignOp(stmtText)
1237 if assignOp != "" {
1238 lhs := extractLHS(stmtText, assignOp)
1239 refs := collectRefsWithFuncLit(stmtText, assignOp)
1240 g.lineRefs(depth, "Assign " | lhs | " " | assignOp, refs)
1241 return
1242 }
1243
1244 // Check for inc/dec
1245 trimmed := bytes.TrimSpace(stmtText)
1246 if len(trimmed) >= 2 {
1247 suffix := string(trimmed[len(trimmed)-2:])
1248 if suffix == "++" || suffix == "--" {
1249 exprPart := string(bytes.TrimSpace(trimmed[:len(trimmed)-2]))
1250 refs := collectRefsFromText(stmtText)
1251 g.lineRefs(depth, exprPart | " " | suffix, refs)
1252 return
1253 }
1254 }
1255
1256 // Expression statement
1257 _ = stmtStart
1258 refs := collectRefsFromText(stmtText)
1259 exprName := normalizeCallArgs(string(bytes.TrimSpace(stmtText)))
1260 g.lineRefs(depth, "Expr " | exprName, refs)
1261 }
1262
1263 func normalizeCallArgs(s string) string {
1264 idx := bytes.IndexByte([]byte(s), '(')
1265 if idx < 0 {
1266 return s
1267 }
1268 depth := 0
1269 for i := idx; i < len(s); i++ {
1270 if s[i] == '(' {
1271 depth++
1272 } else if s[i] == ')' {
1273 depth--
1274 if depth == 0 {
1275 return s[:idx] | "(...)" | s[i+1:]
1276 }
1277 }
1278 }
1279 return s
1280 }
1281
1282 // --- text readers ---
1283
1284 func (g *astGen) readToStmtEnd() []byte {
1285 start := g.pos
1286 depth := 0
1287 for g.pos < len(g.src) {
1288 b := g.src[g.pos]
1289 if b == '(' || b == '[' || b == '{' {
1290 depth++
1291 } else if b == ')' || b == ']' || b == '}' {
1292 if depth == 0 {
1293 break
1294 }
1295 depth--
1296 } else if b == '\n' && depth == 0 {
1297 break
1298 }
1299 // Stop at line comments
1300 if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' && depth == 0 {
1301 result := g.src[start:g.pos]
1302 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1303 g.pos++
1304 }
1305 return result
1306 }
1307 // Skip string literals
1308 if b == '"' || b == '\'' || b == '`' {
1309 g.skipStringLit(b)
1310 continue
1311 }
1312 g.pos++
1313 }
1314 return g.src[start:g.pos]
1315 }
1316
1317 func (g *astGen) readToBlockOpen() []byte {
1318 start := g.pos
1319 depth := 0
1320 for g.pos < len(g.src) {
1321 b := g.src[g.pos]
1322 if b == '{' && depth == 0 {
1323 break
1324 }
1325 if b == '(' {
1326 depth++
1327 } else if b == ')' {
1328 depth--
1329 }
1330 // Skip comments
1331 if b == '/' && g.pos+1 < len(g.src) && g.src[g.pos+1] == '/' {
1332 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1333 g.pos++
1334 }
1335 continue
1336 }
1337 // Skip string/char literals
1338 if b == '"' || b == '\'' || b == '`' {
1339 g.skipStringLit(b)
1340 continue
1341 }
1342 g.pos++
1343 }
1344 return g.src[start:g.pos]
1345 }
1346
1347 func (g *astGen) readToCaseEnd() []byte {
1348 start := g.pos
1349 for g.pos < len(g.src) {
1350 b := g.src[g.pos]
1351 if b == '\'' || b == '"' || b == '`' {
1352 g.skipStringLit(b)
1353 continue
1354 }
1355 if b == ':' {
1356 result := g.src[start:g.pos]
1357 g.pos++
1358 return result
1359 }
1360 if b == '\n' {
1361 break
1362 }
1363 g.pos++
1364 }
1365 return g.src[start:g.pos]
1366 }
1367
1368 func (g *astGen) skipToStmtEnd() {
1369 for g.pos < len(g.src) && g.src[g.pos] != '\n' {
1370 g.pos++
1371 }
1372 }
1373
1374 func (g *astGen) skipStringLit(quote byte) {
1375 g.pos++ // skip opening quote
1376 if quote == '`' {
1377 for g.pos < len(g.src) && g.src[g.pos] != '`' {
1378 g.pos++
1379 }
1380 } else {
1381 for g.pos < len(g.src) && g.src[g.pos] != quote {
1382 if g.src[g.pos] == '\\' {
1383 g.pos++
1384 }
1385 g.pos++
1386 }
1387 }
1388 if g.pos < len(g.src) {
1389 g.pos++
1390 }
1391 }
1392
1393 // --- reference collection ---
1394
1395 var astBuiltins = map[string]bool{
1396 "bool": true, "byte": true, "int": true, "int8": true, "int16": true,
1397 "int32": true, "int64": true, "uint": true, "uint8": true, "uint16": true,
1398 "uint32": true, "uint64": true, "float32": true, "float64": true,
1399 "string": true, "rune": true, "error": true, "any": true,
1400 "true": true, "false": true, "nil": true,
1401 "len": true, "cap": true, "append": true, "copy": true, "delete": true,
1402 "close": true, "panic": true, "recover": true, "print": true, "println": true,
1403 "make": true, "new": true,
1404 }
1405
1406 var astKeywords = map[string]bool{
1407 "func": true, "return": true, "if": true, "else": true, "for": true,
1408 "range": true, "switch": true, "case": true, "default": true,
1409 "select": true, "break": true, "continue": true, "defer": true,
1410 "go": true, "var": true, "const": true, "type": true, "struct": true,
1411 "interface": true, "map": true, "chan": true, "package": true,
1412 "import": true, "fallthrough": true, "goto": true,
1413 }
1414
1415 func collectRefsWithFuncLit(text []byte, assignOp string) []string {
1416 opIdx := bytes.Index(text, []byte(assignOp))
1417 if opIdx < 0 {
1418 return collectRefsFromText(text)
1419 }
1420 rhs := bytes.TrimSpace(text[opIdx+len(assignOp):])
1421 if !bytes.HasPrefix(rhs, []byte("func(")) && !bytes.HasPrefix(rhs, []byte("func (")) {
1422 return collectRefsFromText(text)
1423 }
1424 bodyStart := bytes.IndexByte(rhs, '{')
1425 if bodyStart < 0 {
1426 return collectRefsFromText(text)
1427 }
1428 lhsPart := text[:opIdx]
1429 paramPart := rhs[:bodyStart]
1430 bodyPart := rhs[bodyStart:]
1431
1432 seen := map[string]bool{}
1433 var refs []string
1434 addRefs := func(part []byte) {
1435 for _, r := range collectRefsFromText(part) {
1436 if !seen[r] {
1437 seen[r] = true
1438 refs = append(refs, r)
1439 }
1440 }
1441 }
1442 addRefs(lhsPart)
1443 addRefs(bodyPart)
1444 addRefs(paramPart)
1445 return refs
1446 }
1447
1448 func collectRefsFromText(text []byte) []string {
1449 seen := map[string]bool{}
1450 var refs []string
1451 i := 0
1452 for i < len(text) {
1453 b := text[i]
1454 // Skip string/char/rune literals
1455 if b == '"' || b == '\'' || b == '`' {
1456 i++
1457 if b == '`' {
1458 for i < len(text) && text[i] != '`' {
1459 i++
1460 }
1461 } else {
1462 for i < len(text) && text[i] != b {
1463 if text[i] == '\\' {
1464 i++
1465 }
1466 i++
1467 }
1468 }
1469 if i < len(text) {
1470 i++
1471 }
1472 continue
1473 }
1474 // Skip numbers
1475 if b >= '0' && b <= '9' {
1476 for i < len(text) && (text[i] >= '0' && text[i] <= '9' || text[i] == 'x' || text[i] == 'X' || text[i] >= 'a' && text[i] <= 'f' || text[i] >= 'A' && text[i] <= 'F' || text[i] == '.') {
1477 i++
1478 }
1479 continue
1480 }
1481 if isIdentStart(b) {
1482 start := i
1483 for i < len(text) && isIdent(text[i]) {
1484 i++
1485 }
1486 name := string(text[start:i])
1487 if !seen[name] && !astBuiltins[name] && !astKeywords[name] {
1488 seen[name] = true
1489 refs = append(refs, name)
1490 }
1491 continue
1492 }
1493 i++
1494 }
1495 return refs
1496 }
1497
1498 func findAssignOp(text []byte) string {
1499 depth := 0
1500 i := 0
1501 for i < len(text) {
1502 b := text[i]
1503 if b == '(' || b == '[' || b == '{' {
1504 depth++
1505 } else if b == ')' || b == ']' || b == '}' {
1506 depth--
1507 }
1508 // Skip string literals
1509 if b == '"' || b == '\'' || b == '`' {
1510 i++
1511 q := b
1512 if q == '`' {
1513 for i < len(text) && text[i] != '`' {
1514 i++
1515 }
1516 } else {
1517 for i < len(text) && text[i] != q {
1518 if text[i] == '\\' {
1519 i++
1520 }
1521 i++
1522 }
1523 }
1524 if i < len(text) {
1525 i++
1526 }
1527 continue
1528 }
1529 if depth == 0 {
1530 if b == ':' && i+1 < len(text) && text[i+1] == '=' {
1531 return ":="
1532 }
1533 if b == '+' && i+1 < len(text) && text[i+1] == '=' {
1534 return "+="
1535 }
1536 if b == '-' && i+1 < len(text) && text[i+1] == '=' {
1537 return "-="
1538 }
1539 if b == '*' && i+1 < len(text) && text[i+1] == '=' {
1540 return "*="
1541 }
1542 if b == '/' && i+1 < len(text) && text[i+1] == '=' {
1543 return "/="
1544 }
1545 if b == '%' && i+1 < len(text) && text[i+1] == '=' {
1546 return "%="
1547 }
1548 if b == '&' && i+1 < len(text) && text[i+1] == '=' {
1549 return "&="
1550 }
1551 if b == '|' && i+1 < len(text) && text[i+1] == '=' {
1552 return "|="
1553 }
1554 if b == '^' && i+1 < len(text) && text[i+1] == '=' {
1555 return "^="
1556 }
1557 if b == '<' && i+1 < len(text) && text[i+1] == '<' && i+2 < len(text) && text[i+2] == '=' {
1558 return "<<="
1559 }
1560 if b == '>' && i+1 < len(text) && text[i+1] == '>' && i+2 < len(text) && text[i+2] == '=' {
1561 return ">>="
1562 }
1563 // Plain '=' but not '==' or '!=' or '<=' or '>='
1564 if b == '=' && (i+1 >= len(text) || text[i+1] != '=') {
1565 if i > 0 && (text[i-1] == '!' || text[i-1] == '<' || text[i-1] == '>' || text[i-1] == ':' || text[i-1] == '+' || text[i-1] == '-' || text[i-1] == '*' || text[i-1] == '/' || text[i-1] == '%' || text[i-1] == '&' || text[i-1] == '|' || text[i-1] == '^') {
1566 i++
1567 continue
1568 }
1569 return "="
1570 }
1571 }
1572 i++
1573 }
1574 return ""
1575 }
1576
1577 func extractLHS(text []byte, op string) string {
1578 idx := bytes.Index(text, []byte(op))
1579 if idx < 0 {
1580 return ""
1581 }
1582 lhs := bytes.TrimSpace(text[:idx])
1583 // Normalize: remove spaces around commas (mxcorpus uses "a,b" not "a, b")
1584 parts := bytes.Split(lhs, []byte(","))
1585 for i := range parts {
1586 parts[i] = bytes.TrimSpace(parts[i])
1587 }
1588 return string(bytes.Join(parts, []byte(",")))
1589 }
1590
1591 func extractRangeExpr(text []byte) string {
1592 idx := bytes.Index(text, []byte("range "))
1593 if idx < 0 {
1594 return ""
1595 }
1596 rest := bytes.TrimSpace(text[idx+6:])
1597 // Extract the identifier refs from the range expression
1598 var parts []string
1599 i := 0
1600 for i < len(rest) {
1601 if isIdentStart(rest[i]) {
1602 start := i
1603 for i < len(rest) && isIdent(rest[i]) {
1604 i++
1605 }
1606 parts = append(parts, string(rest[start:i]))
1607 continue
1608 }
1609 i++
1610 }
1611 if len(parts) > 0 {
1612 return parts[0]
1613 }
1614 return string(rest)
1615 }
1616