mxrewrite.go raw
1 package loader
2
3 // Moxie source rewrites.
4 //
5 // These transforms run before or during the parse/typecheck pipeline to
6 // bridge Moxie syntax to what Go's parser and type checker accept.
7 //
8 // 1. rewriteChanLiterals: text-level rewrite before parsing.
9 // chan T{} → make(chan T)
10 // chan T{N} → make(chan T, N)
11 //
12 // 1b. rewriteSliceLiterals: text-level rewrite before parsing.
13 // []T{:len} → make([]T, len)
14 // []T{:len:cap} → make([]T, len, cap)
15 //
16 // 2. rewriteStringLiterals: AST-level rewrite after parsing, before typecheck.
17 // "hello" → []byte("hello")
18 // "a" + "b" → []byte("a" + "b")
19 //
20 // 3. rewritePipeConcat: AST-level rewrite after first typecheck pass.
21 // a | b (where both are []byte) → __moxie_concat(a, b)
22
23 import (
24 "bytes"
25 "go/ast"
26 "go/scanner"
27 "go/token"
28 "go/types"
29 "strings"
30 )
31
32 // isMoxieStringTarget returns true if a package should get string→[]byte
33 // rewrites (string literal wrapping, | concat, comparison rewrites).
34 //
35 // Permanently exempt packages implement low-level primitives or
36 // syscall interfaces that require native Go string/uintptr types.
37 func isMoxieStringTarget(importPath string) bool {
38 if strings.HasPrefix(importPath, "runtime") || // language primitives
39 strings.HasPrefix(importPath, "internal/task") || // cooperative scheduler
40 strings.HasPrefix(importPath, "internal/abi") || // ABI type descriptors
41 strings.HasPrefix(importPath, "internal/reflectlite") || // type reflection
42 strings.HasPrefix(importPath, "internal/itoa") || // used by reflectlite, returns string
43 strings.HasPrefix(importPath, "syscall") || // syscall interfaces
44 strings.HasPrefix(importPath, "internal/syscall") || // syscall internals
45 strings.HasPrefix(importPath, "os") || // FDs, syscall wrappers
46 strings.HasPrefix(importPath, "unsafe") || // language primitive
47 strings.HasPrefix(importPath, "reflect") { // must handle all Go types
48 return false
49 }
50 return true
51 }
52
53 // ---------------------------------------------------------------------------
54 // 1. Channel literal rewrite (text-level, before parsing)
55 // ---------------------------------------------------------------------------
56
57 // rewriteChanLiterals scans source bytes for channel literal syntax and
58 // rewrites to make(chan T) calls that Go's parser accepts.
59 //
60 // Patterns:
61 // chan T{} → make(chan T)
62 // chan T{N} → make(chan T, N)
63 //
64 // The rewrite is token-aware: it uses go/scanner to avoid matching inside
65 // strings or comments. It only rewrites when 'chan' is followed by a type
66 // expression and then '{' in expression context.
67 func rewriteChanLiterals(src []byte, fset *token.FileSet) []byte {
68 // Tokenize the source.
69 type tok struct {
70 pos int
71 end int
72 tok token.Token
73 lit string
74 offset int // byte offset in src
75 }
76
77 file := fset.AddFile("", fset.Base(), len(src))
78 var s scanner.Scanner
79 s.Init(file, src, nil, scanner.ScanComments)
80
81 var toks []tok
82 for {
83 pos, t, lit := s.Scan()
84 if t == token.EOF {
85 break
86 }
87 offset := file.Offset(pos)
88 end := offset + len(lit)
89 if lit == "" {
90 end = offset + len(t.String())
91 }
92 toks = append(toks, tok{pos: offset, end: end, tok: t, lit: lit})
93 }
94
95 // Scan for pattern: CHAN typeTokens... LBRACE [expr] RBRACE
96 // where typeTokens form a valid channel element type.
97 var result bytes.Buffer
98 lastEnd := 0
99
100 for i := 0; i < len(toks); i++ {
101 if toks[i].tok != token.CHAN {
102 continue
103 }
104
105 // Found 'chan'. Now find the type expression and the '{'.
106 // Type expression is everything between 'chan' and '{'.
107 // It could be: int32, *Foo, []byte, <-chan int, etc.
108
109 chanIdx := i
110 braceIdx := -1
111
112 // Find the opening brace. Track nesting to handle complex types
113 // like chan []byte (which contains no braces in the type).
114 // Skip tokens that are part of the type expression.
115 depth := 0
116 for j := i + 1; j < len(toks); j++ {
117 switch toks[j].tok {
118 case token.LBRACE:
119 if depth == 0 {
120 braceIdx = j
121 }
122 depth++
123 case token.RBRACE:
124 depth--
125 case token.LPAREN:
126 depth++
127 case token.RPAREN:
128 depth--
129 }
130 if braceIdx >= 0 {
131 break
132 }
133 // Stop if we hit something that can't be part of a type expression.
134 if toks[j].tok == token.SEMICOLON || toks[j].tok == token.ASSIGN ||
135 toks[j].tok == token.DEFINE || toks[j].tok == token.COMMA ||
136 toks[j].tok == token.RPAREN {
137 break
138 }
139 }
140
141 if braceIdx < 0 || braceIdx <= chanIdx+1 {
142 continue // no brace found, or nothing between chan and {
143 }
144
145 // Check this is in expression context by whitelisting tokens that
146 // can precede a channel literal. In type contexts (var/func/field
147 // declarations), the { is a block/body opener, not a literal.
148 inExprContext := false
149 if chanIdx > 0 {
150 prev := toks[chanIdx-1].tok
151 switch prev {
152 case token.ASSIGN, token.DEFINE, // x = chan T{}, x := chan T{}
153 token.COLON, // field: chan T{}
154 token.COMMA, // f(a, chan T{})
155 token.LPAREN, // f(chan T{})
156 token.LBRACK, // []chan T{}
157 token.LBRACE, // {chan T{}}
158 token.RETURN, // return chan T{}
159 token.SEMICOLON: // ; chan T{}
160 inExprContext = true
161 }
162 } else {
163 inExprContext = true // first token
164 }
165
166 if !inExprContext {
167 continue
168 }
169
170 // Find the matching closing brace.
171 closeIdx := -1
172 depth = 1
173 for j := braceIdx + 1; j < len(toks); j++ {
174 switch toks[j].tok {
175 case token.LBRACE:
176 depth++
177 case token.RBRACE:
178 depth--
179 if depth == 0 {
180 closeIdx = j
181 }
182 }
183 if closeIdx >= 0 {
184 break
185 }
186 }
187
188 if closeIdx < 0 {
189 continue
190 }
191
192 // Extract the type expression text (between chan and {).
193 typeStart := toks[chanIdx+1].pos
194 typeEnd := toks[braceIdx].pos
195 typeText := strings.TrimSpace(string(src[typeStart:typeEnd]))
196
197 if typeText == "" {
198 continue
199 }
200
201 // Handle chan struct{}{} and chan interface{}{}: the first {} is
202 // part of the type, the second {} is the channel literal body.
203 if typeText == "struct" || typeText == "interface" {
204 // closeIdx points to the } that closes struct{}/interface{}.
205 // Look for another {…} pair after it — that's the literal body.
206 if closeIdx+1 >= len(toks) || toks[closeIdx+1].tok != token.LBRACE {
207 continue // just "chan struct{}" in type context, no literal
208 }
209 // Include the struct{}/interface{} braces in the type text.
210 typeText = typeText + "{}"
211 braceIdx = closeIdx + 1
212 // Find the matching close for the literal body.
213 closeIdx = -1
214 depth = 1
215 for j := braceIdx + 1; j < len(toks); j++ {
216 switch toks[j].tok {
217 case token.LBRACE:
218 depth++
219 case token.RBRACE:
220 depth--
221 if depth == 0 {
222 closeIdx = j
223 }
224 }
225 if closeIdx >= 0 {
226 break
227 }
228 }
229 if closeIdx < 0 {
230 continue
231 }
232 }
233
234 // Extract the buffer size expression (between { and }).
235 var bufExpr string
236 if closeIdx > braceIdx+1 {
237 bufStart := toks[braceIdx+1].pos
238 bufEnd := toks[closeIdx].pos
239 bufExpr = strings.TrimSpace(string(src[bufStart:bufEnd]))
240 }
241
242 // Write everything before this channel literal.
243 result.Write(src[lastEnd:toks[chanIdx].pos])
244
245 // Write the replacement: (make)(chan T) or (make)(chan T, N).
246 // Parenthesized (make) so AST restriction check skips generated calls.
247 result.WriteString("(make)(chan ")
248 result.WriteString(typeText)
249 if bufExpr != "" {
250 result.WriteString(", ")
251 result.WriteString(bufExpr)
252 }
253 result.WriteString(")")
254
255 lastEnd = toks[closeIdx].end
256 i = closeIdx // skip past the closing brace
257 }
258
259 if lastEnd == 0 {
260 return src // no rewrites
261 }
262 result.Write(src[lastEnd:])
263 return result.Bytes()
264 }
265
266 // ---------------------------------------------------------------------------
267 // 1b. Slice size literal rewrite (text-level, before parsing)
268 // ---------------------------------------------------------------------------
269
270 // rewriteSliceLiterals scans source bytes for slice size literal syntax and
271 // rewrites to make() calls that Go's parser accepts.
272 //
273 // Patterns:
274 // []T{:len} → make([]T, len)
275 // []T{:len:cap} → make([]T, len, cap)
276 //
277 // The leading colon after { distinguishes this from regular composite literals
278 // ([]int{1, 2, 3} has no colon). The syntax mirrors Go's three-index slice
279 // expression a[low:high:max].
280 func rewriteSliceLiterals(src []byte, fset *token.FileSet) []byte {
281 type tok struct {
282 pos int
283 end int
284 tok token.Token
285 lit string
286 }
287
288 file := fset.AddFile("", fset.Base(), len(src))
289 var s scanner.Scanner
290 s.Init(file, src, nil, scanner.ScanComments)
291
292 var toks []tok
293 for {
294 pos, t, lit := s.Scan()
295 if t == token.EOF {
296 break
297 }
298 offset := file.Offset(pos)
299 end := offset + len(lit)
300 if lit == "" {
301 end = offset + len(t.String())
302 }
303 toks = append(toks, tok{pos: offset, end: end, tok: t, lit: lit})
304 }
305
306 var result bytes.Buffer
307 lastEnd := 0
308
309 for i := 0; i < len(toks); i++ {
310 // Look for LBRACK RBRACK ... LBRACE COLON pattern.
311 if toks[i].tok != token.LBRACK {
312 continue
313 }
314 if i+1 >= len(toks) || toks[i+1].tok != token.RBRACK {
315 continue
316 }
317
318 lbrackIdx := i
319
320 // Scan forward past the element type to find LBRACE.
321 braceIdx := -1
322 depth := 0
323 for j := i + 2; j < len(toks); j++ {
324 switch toks[j].tok {
325 case token.LBRACK:
326 depth++
327 case token.RBRACK:
328 depth--
329 case token.LPAREN:
330 depth++
331 case token.RPAREN:
332 depth--
333 case token.LBRACE:
334 if depth == 0 {
335 braceIdx = j
336 }
337 }
338 if braceIdx >= 0 {
339 break
340 }
341 // Stop at tokens that can't be part of a type expression.
342 if depth == 0 && (toks[j].tok == token.SEMICOLON ||
343 toks[j].tok == token.ASSIGN ||
344 toks[j].tok == token.DEFINE ||
345 toks[j].tok == token.COMMA) {
346 break
347 }
348 }
349
350 if braceIdx < 0 || braceIdx <= lbrackIdx+2 {
351 continue // no brace, or nothing between [] and {
352 }
353
354 // Check that the token after { is COLON — this is the discriminator.
355 if braceIdx+1 >= len(toks) || toks[braceIdx+1].tok != token.COLON {
356 continue // regular composite literal, not slice size
357 }
358
359 // Find the closing brace, collecting colon positions for len:cap.
360 // Track all bracket types so colons inside subscripts (e.g. buf[:2])
361 // aren't mistaken for the len:cap separator.
362 closeIdx := -1
363 colonPositions := []int{braceIdx + 1} // first colon already found
364 depth = 1
365 bracketDepth := 0
366 parenDepth := 0
367 for j := braceIdx + 2; j < len(toks); j++ {
368 switch toks[j].tok {
369 case token.LBRACE:
370 depth++
371 case token.RBRACE:
372 depth--
373 if depth == 0 {
374 closeIdx = j
375 }
376 case token.LBRACK:
377 bracketDepth++
378 case token.RBRACK:
379 bracketDepth--
380 case token.LPAREN:
381 parenDepth++
382 case token.RPAREN:
383 parenDepth--
384 case token.COLON:
385 if depth == 1 && bracketDepth == 0 && parenDepth == 0 {
386 colonPositions = append(colonPositions, j)
387 }
388 }
389 if closeIdx >= 0 {
390 break
391 }
392 }
393
394 if closeIdx < 0 {
395 continue
396 }
397
398 // Extract the type text (between [ and {, inclusive of []).
399 typeText := string(src[toks[lbrackIdx].pos:toks[braceIdx].pos])
400 typeText = strings.TrimSpace(typeText)
401
402 if len(colonPositions) == 1 {
403 // []T{:len} → make([]T, len)
404 lenStart := toks[colonPositions[0]+1].pos
405 lenEnd := toks[closeIdx].pos
406 lenExpr := strings.TrimSpace(string(src[lenStart:lenEnd]))
407 if lenExpr == "" {
408 continue
409 }
410
411 result.Write(src[lastEnd:toks[lbrackIdx].pos])
412 result.WriteString("(make)(")
413 result.WriteString(typeText)
414 result.WriteString(", ")
415 result.WriteString(lenExpr)
416 result.WriteString(")")
417 } else if len(colonPositions) == 2 {
418 // []T{:len:cap} → make([]T, len, cap)
419 // len is between first colon and second colon
420 lenStart := toks[colonPositions[0]+1].pos
421 lenEnd := toks[colonPositions[1]].pos
422 lenExpr := strings.TrimSpace(string(src[lenStart:lenEnd]))
423
424 // cap is between second colon and closing brace
425 capStart := toks[colonPositions[1]+1].pos
426 capEnd := toks[closeIdx].pos
427 capExpr := strings.TrimSpace(string(src[capStart:capEnd]))
428
429 if lenExpr == "" || capExpr == "" {
430 continue
431 }
432
433 result.Write(src[lastEnd:toks[lbrackIdx].pos])
434 result.WriteString("(make)(")
435 result.WriteString(typeText)
436 result.WriteString(", ")
437 result.WriteString(lenExpr)
438 result.WriteString(", ")
439 result.WriteString(capExpr)
440 result.WriteString(")")
441 } else {
442 continue // malformed — more than 2 colons
443 }
444
445 lastEnd = toks[closeIdx].end
446 i = closeIdx
447 }
448
449 if lastEnd == 0 {
450 return src // no rewrites
451 }
452 result.Write(src[lastEnd:])
453 return result.Bytes()
454 }
455
456 // ---------------------------------------------------------------------------
457 // 2. String literal rewrite (AST-level, after parsing, before typecheck)
458 // ---------------------------------------------------------------------------
459
460 // rewriteStringLiterals wraps string literals and string binary expressions
461 // in []byte() conversions throughout the AST of a user package.
462 //
463 // "hello" → []byte("hello")
464 // "a" + "b" → []byte("a" + "b")
465 //
466 // This makes Go's type checker see []byte instead of string for all text
467 // values in user code.
468 func rewriteStringLiterals(file *ast.File) {
469 // Walk the AST and replace string expressions with []byte() wrapped versions.
470 // We need to walk parent nodes to replace children in-place.
471 rewriteStringExprs(file)
472 }
473
474 // rewriteStringExprs walks the AST and wraps string-typed expressions in []byte().
475 func rewriteStringExprs(node ast.Node) {
476 ast.Inspect(node, func(n ast.Node) bool {
477 // Don't descend into []byte() wrappers we created — prevents
478 // infinite recursion (walker would visit the inner string literal
479 // and try to wrap it again).
480 if expr, ok := n.(ast.Expr); ok && isSliceByteConversion(expr) {
481 return false
482 }
483 // Convert string const declarations to var with []byte values.
484 // Go const can only hold string; after mxpurify converts
485 // fields/params to []byte, const strings can't be assigned.
486 if gd, ok := n.(*ast.GenDecl); ok && gd.Tok == token.CONST {
487 convertStringConstsToVars(gd)
488 return false
489 }
490 // Skip function bodies that return string — these are interface-
491 // mandated methods (Error(), String()) that cannot return []byte.
492 if fd, ok := n.(*ast.FuncDecl); ok && funcReturnsString(fd) {
493 return false
494 }
495 if fl, ok := n.(*ast.FuncLit); ok && funcTypeReturnsString(fl.Type) {
496 return false
497 }
498 switch parent := n.(type) {
499 case *ast.AssignStmt:
500 for i, rhs := range parent.Rhs {
501 if wrapped := wrapStringExpr(rhs); wrapped != nil {
502 parent.Rhs[i] = wrapped
503 }
504 }
505 case *ast.ValueSpec:
506 for i, val := range parent.Values {
507 if wrapped := wrapStringExpr(val); wrapped != nil {
508 parent.Values[i] = wrapped
509 }
510 }
511 case *ast.ReturnStmt:
512 for i, result := range parent.Results {
513 if wrapped := wrapStringExpr(result); wrapped != nil {
514 parent.Results[i] = wrapped
515 }
516 }
517 case *ast.CallExpr:
518 // Skip wrapping args to calls on exempt packages
519 // (e.g. os.Open("file") — os is exempt, expects string).
520 if !isExemptPackageCall(parent) {
521 for i, arg := range parent.Args {
522 if wrapped := wrapStringExpr(arg); wrapped != nil {
523 parent.Args[i] = wrapped
524 }
525 }
526 }
527 case *ast.SendStmt:
528 if wrapped := wrapStringExpr(parent.Value); wrapped != nil {
529 parent.Value = wrapped
530 }
531 case *ast.KeyValueExpr:
532 if wrapped := wrapStringExpr(parent.Value); wrapped != nil {
533 parent.Value = wrapped
534 }
535 case *ast.BinaryExpr:
536 // Wrap string literals on either side of comparison operators.
537 if wrapped := wrapStringExpr(parent.X); wrapped != nil {
538 parent.X = wrapped
539 }
540 if wrapped := wrapStringExpr(parent.Y); wrapped != nil {
541 parent.Y = wrapped
542 }
543 case *ast.CaseClause:
544 // Wrap string literals in switch case values.
545 for i, val := range parent.List {
546 if wrapped := wrapStringExpr(val); wrapped != nil {
547 parent.List[i] = wrapped
548 }
549 }
550 case *ast.CompositeLit:
551 for i, elt := range parent.Elts {
552 // Skip KeyValueExpr — handled above for values.
553 if _, isKV := elt.(*ast.KeyValueExpr); isKV {
554 continue
555 }
556 if wrapped := wrapStringExpr(elt); wrapped != nil {
557 parent.Elts[i] = wrapped
558 }
559 }
560 case *ast.IndexExpr:
561 if wrapped := wrapStringExpr(parent.Index); wrapped != nil {
562 parent.Index = wrapped
563 }
564 case *ast.IfStmt:
565 // Wrap in if-init statements (e.g. if x := "val"; ...).
566 // Cond is a BinaryExpr, handled above.
567 case *ast.SwitchStmt:
568 // Wrap switch tag if it's a string literal.
569 if parent.Tag != nil {
570 if wrapped := wrapStringExpr(parent.Tag); wrapped != nil {
571 parent.Tag = wrapped
572 }
573 }
574 }
575 return true
576 })
577 }
578
579 // wrapStringExpr returns a []byte(expr) wrapping if expr is a string-producing
580 // expression (string literal or binary + of string expressions). Returns nil
581 // if no wrapping is needed.
582 func wrapStringExpr(expr ast.Expr) ast.Expr {
583 if !isStringExpr(expr) {
584 return nil
585 }
586 // Already wrapped in []byte() — don't double-wrap.
587 if isSliceByteConversion(expr) {
588 return nil
589 }
590 return makeSliceByteCall(expr)
591 }
592
593 // isStringExpr returns true if the expression is syntactically a string literal
594 // or a binary + of string expressions (constant string concatenation).
595 func isStringExpr(expr ast.Expr) bool {
596 switch e := expr.(type) {
597 case *ast.BasicLit:
598 return e.Kind == token.STRING
599 case *ast.BinaryExpr:
600 if e.Op == token.ADD {
601 return isStringExpr(e.X) && isStringExpr(e.Y)
602 }
603 case *ast.ParenExpr:
604 return isStringExpr(e.X)
605 }
606 return false
607 }
608
609 // isSliceByteConversion returns true if expr is []byte(...).
610 func isSliceByteConversion(expr ast.Expr) bool {
611 call, ok := expr.(*ast.CallExpr)
612 if !ok || len(call.Args) != 1 {
613 return false
614 }
615 arr, ok := call.Fun.(*ast.ArrayType)
616 if !ok || arr.Len != nil {
617 return false
618 }
619 ident, ok := arr.Elt.(*ast.Ident)
620 return ok && ident.Name == "byte"
621 }
622
623 // convertStringConstsToVars converts pure string constants to var declarations
624 // with []byte values. Only converts specs where ALL values are string literals
625 // and there's no explicit type or iota. Leaves numeric/mixed consts untouched.
626 func convertStringConstsToVars(gd *ast.GenDecl) {
627 // Check if this is a pure string const block.
628 // If any spec uses iota or has non-string values, skip entirely.
629 hasString := false
630 for _, spec := range gd.Specs {
631 vs, ok := spec.(*ast.ValueSpec)
632 if !ok {
633 return
634 }
635 // Has explicit type — leave as-is (could be int, byte, etc.)
636 if vs.Type != nil {
637 continue
638 }
639 for _, val := range vs.Values {
640 if isStringExpr(val) {
641 hasString = true
642 }
643 }
644 }
645 if !hasString {
646 return
647 }
648 // Convert string-only specs to var with []byte wrapping.
649 // Keep non-string specs as const.
650 gd.Tok = token.VAR
651 for _, spec := range gd.Specs {
652 vs, ok := spec.(*ast.ValueSpec)
653 if !ok {
654 continue
655 }
656 allString := true
657 for _, val := range vs.Values {
658 if !isStringExpr(val) {
659 allString = false
660 break
661 }
662 }
663 if !allString || vs.Type != nil {
664 // Non-string spec in a now-var block. This is OK —
665 // Go allows mixed const/var blocks, and var init from
666 // literal is valid.
667 continue
668 }
669 for i, val := range vs.Values {
670 if wrapped := wrapStringExpr(val); wrapped != nil {
671 vs.Values[i] = wrapped
672 }
673 }
674 // Update the Object.Kind so the type checker sees these as
675 // variables, not constants. The parser sets Kind=Con for const
676 // declarations and the type checker uses this.
677 for _, name := range vs.Names {
678 if name.Obj != nil {
679 name.Obj.Kind = ast.Var
680 }
681 }
682 }
683 }
684
685 // funcReturnsString returns true if a FuncDecl has string in its return types.
686 // isExemptPackageCall returns true if a call expression targets a function
687 // from a package exempt from string rewrites (e.g. os.Open, errors.New before
688 // conversion). These calls expect string parameters, not []byte.
689 func isExemptPackageCall(call *ast.CallExpr) bool {
690 sel, ok := call.Fun.(*ast.SelectorExpr)
691 if !ok {
692 return false
693 }
694 ident, ok := sel.X.(*ast.Ident)
695 if !ok {
696 return false
697 }
698 return !isMoxieStringTarget(ident.Name)
699 }
700
701 func funcReturnsString(fd *ast.FuncDecl) bool {
702 return funcTypeReturnsString(fd.Type)
703 }
704
705 // funcTypeReturnsString returns true if a FuncType has string in its return types.
706 func funcTypeReturnsString(ft *ast.FuncType) bool {
707 if ft.Results == nil {
708 return false
709 }
710 for _, field := range ft.Results.List {
711 if ident, ok := field.Type.(*ast.Ident); ok && ident.Name == "string" {
712 return true
713 }
714 }
715 return false
716 }
717
718 // makeSliceByteCall creates an AST node for []byte(expr).
719 func makeSliceByteCall(expr ast.Expr) *ast.CallExpr {
720 return &ast.CallExpr{
721 Fun: &ast.ArrayType{
722 Elt: &ast.Ident{Name: "byte"},
723 },
724 Args: []ast.Expr{expr},
725 }
726 }
727
728 // ---------------------------------------------------------------------------
729 // 2b. Builtin int→int32 wrapping (AST-level, after parsing, before typecheck)
730 // ---------------------------------------------------------------------------
731
732 // rewriteBuiltinIntReturns wraps len(), cap(), and copy() calls in int32()
733 // conversions. These builtins return int (from Go's universe), but Moxie
734 // uses int32 as the standard sized integer. Without this wrapping, mixing
735 // len() results with int32 values causes type checker errors.
736 //
737 // len(x) → int32(len(x))
738 // cap(x) → int32(cap(x))
739 // copy(dst,src)→ int32(copy(dst,src))
740 func rewriteBuiltinIntReturns(file *ast.File) {
741 // Builtins whose return type is int.
742 intBuiltins := map[string]bool{"len": true, "cap": true, "copy": true}
743
744 ast.Inspect(file, func(n ast.Node) bool {
745 // Skip const declarations — const expressions must stay untyped.
746 if gd, ok := n.(*ast.GenDecl); ok && gd.Tok == token.CONST {
747 return false
748 }
749 // Don't descend into int32() wrappers we just created — prevents
750 // infinite recursion (walker would find len() inside and re-wrap).
751 if call, ok := n.(*ast.CallExpr); ok {
752 if ident, ok := call.Fun.(*ast.Ident); ok && ident.Name == "int32" {
753 return false
754 }
755 }
756 switch parent := n.(type) {
757 case *ast.AssignStmt:
758 for i, rhs := range parent.Rhs {
759 if wrapped := wrapIntBuiltin(rhs, intBuiltins); wrapped != nil {
760 parent.Rhs[i] = wrapped
761 }
762 }
763 case *ast.ValueSpec:
764 for i, val := range parent.Values {
765 if wrapped := wrapIntBuiltin(val, intBuiltins); wrapped != nil {
766 parent.Values[i] = wrapped
767 }
768 }
769 case *ast.ReturnStmt:
770 for i, result := range parent.Results {
771 if wrapped := wrapIntBuiltin(result, intBuiltins); wrapped != nil {
772 parent.Results[i] = wrapped
773 }
774 }
775 case *ast.CallExpr:
776 for i, arg := range parent.Args {
777 if wrapped := wrapIntBuiltin(arg, intBuiltins); wrapped != nil {
778 parent.Args[i] = wrapped
779 }
780 }
781 case *ast.BinaryExpr:
782 if wrapped := wrapIntBuiltin(parent.X, intBuiltins); wrapped != nil {
783 parent.X = wrapped
784 }
785 if wrapped := wrapIntBuiltin(parent.Y, intBuiltins); wrapped != nil {
786 parent.Y = wrapped
787 }
788 case *ast.IndexExpr:
789 if wrapped := wrapIntBuiltin(parent.Index, intBuiltins); wrapped != nil {
790 parent.Index = wrapped
791 }
792 case *ast.SendStmt:
793 if wrapped := wrapIntBuiltin(parent.Value, intBuiltins); wrapped != nil {
794 parent.Value = wrapped
795 }
796 case *ast.KeyValueExpr:
797 if wrapped := wrapIntBuiltin(parent.Value, intBuiltins); wrapped != nil {
798 parent.Value = wrapped
799 }
800 }
801 return true
802 })
803 }
804
805 // wrapIntBuiltin checks if expr is a call to a builtin that returns int,
806 // and if so, wraps it in int32(). Returns nil if no wrapping needed.
807 func wrapIntBuiltin(expr ast.Expr, builtins map[string]bool) ast.Expr {
808 call, ok := expr.(*ast.CallExpr)
809 if !ok {
810 return nil
811 }
812 ident, ok := call.Fun.(*ast.Ident)
813 if !ok || !builtins[ident.Name] {
814 return nil
815 }
816 // Already wrapped in int32() — don't double-wrap.
817 // (Check grandparent, but simpler: check if Fun is already int32.)
818 return &ast.CallExpr{
819 Fun: &ast.Ident{Name: "int32"},
820 Args: []ast.Expr{call},
821 }
822 }
823
824 // ---------------------------------------------------------------------------
825 // 3. Pipe concatenation rewrite (AST-level, after first typecheck pass)
826 // ---------------------------------------------------------------------------
827
828 // pipeRewrite records a | expression that should become __moxie_concat.
829 type pipeRewrite struct {
830 parent ast.Node
831 expr *ast.BinaryExpr
832 }
833
834 // findPipeConcat walks the AST and finds | and + expressions where operands
835 // are []byte, using type information from a completed typecheck pass.
836 // Catches both explicit | (pipe concat) and + (string concat that wasn't
837 // converted by mxpurify).
838 func findPipeConcat(files []*ast.File, info *types.Info) []pipeRewrite {
839 var rewrites []pipeRewrite
840 for _, file := range files {
841 ast.Inspect(file, func(n ast.Node) bool {
842 bin, ok := n.(*ast.BinaryExpr)
843 if !ok || (bin.Op != token.OR && bin.Op != token.ADD) {
844 return true
845 }
846 // Check if operands are []byte or string (string=[]byte unification).
847 // Use both type info and syntactic detection (the []byte(...) wrapper
848 // pattern from string literal rewrite), since type info may be
849 // incomplete after errors.
850 xType := info.TypeOf(bin.X)
851 yType := info.TypeOf(bin.Y)
852 xOk := (xType != nil && isTextType(xType)) || isSliceByteConversion(bin.X) || isMoxieConcatCall(bin.X)
853 yOk := (yType != nil && isTextType(yType)) || isSliceByteConversion(bin.Y) || isMoxieConcatCall(bin.Y)
854 // For | (pipe) and + (string concat), detect nested chains —
855 // if one side is confirmed text and the other is a nested
856 // binary of the same operator, propagate the text detection.
857 if bin.Op == token.OR || bin.Op == token.ADD {
858 if !xOk && yOk {
859 if inner, ok := bin.X.(*ast.BinaryExpr); ok && (inner.Op == token.OR || inner.Op == token.ADD) {
860 xOk = true
861 }
862 }
863 if !yOk && xOk {
864 if inner, ok := bin.Y.(*ast.BinaryExpr); ok && (inner.Op == token.OR || inner.Op == token.ADD) {
865 yOk = true
866 }
867 }
868 }
869 if xOk && yOk {
870 rewrites = append(rewrites, pipeRewrite{expr: bin})
871 }
872 return true
873 })
874 }
875 return rewrites
876 }
877
878 // isByteSlice returns true if t is []byte (or []uint8).
879 func isByteSlice(t types.Type) bool {
880 sl, ok := t.Underlying().(*types.Slice)
881 if !ok {
882 return false
883 }
884 basic, ok := sl.Elem().(*types.Basic)
885 return ok && basic.Kind() == types.Byte
886 }
887
888 // isMoxieConcatCall returns true if the expression is a __moxie_concat call.
889 // Needed for chained + detection after earlier rewrites replaced inner + nodes.
890 func isMoxieConcatCall(e ast.Expr) bool {
891 call, ok := e.(*ast.CallExpr)
892 if !ok {
893 return false
894 }
895 ident, ok := call.Fun.(*ast.Ident)
896 return ok && ident.Name == "__moxie_concat"
897 }
898
899 // isTextType returns true if t is []byte or string (equivalent under Moxie's
900 // string=[]byte unification).
901 func isTextType(t types.Type) bool {
902 if isByteSlice(t) {
903 return true
904 }
905 basic, ok := t.Underlying().(*types.Basic)
906 return ok && basic.Info()&types.IsString != 0
907 }
908
909 // rewriteAddAssign converts `s += expr` to `s = __moxie_concat(s, expr)` for
910 // text types (string and []byte). The += operator doesn't produce a BinaryExpr
911 // in the AST, so findPipeConcat can't catch it. Returns number of rewrites.
912 func rewriteAddAssign(files []*ast.File, info *types.Info) int {
913 count := 0
914 for _, file := range files {
915 ast.Inspect(file, func(n ast.Node) bool {
916 assign, ok := n.(*ast.AssignStmt)
917 if !ok || assign.Tok != token.ADD_ASSIGN || len(assign.Lhs) != 1 {
918 return true
919 }
920 lhsType := info.TypeOf(assign.Lhs[0])
921 if lhsType == nil || !isTextType(lhsType) {
922 return true
923 }
924 // Rewrite: s += expr → s = __moxie_concat(s, expr)
925 assign.Tok = token.ASSIGN
926 assign.Rhs[0] = &ast.CallExpr{
927 Fun: &ast.Ident{Name: "__moxie_concat"},
928 Args: []ast.Expr{
929 assign.Lhs[0],
930 assign.Rhs[0],
931 },
932 }
933 count++
934 return true
935 })
936 }
937 return count
938 }
939
940 // applyPipeRewrites replaces | binary expressions with __moxie_concat calls.
941 // It walks the AST and replaces matching BinaryExpr nodes in-place.
942 func applyPipeRewrites(files []*ast.File, rewrites []pipeRewrite) {
943 // Build a set of expressions to rewrite.
944 rewriteSet := make(map[*ast.BinaryExpr]bool)
945 for _, r := range rewrites {
946 rewriteSet[r.expr] = true
947 }
948 if len(rewriteSet) == 0 {
949 return
950 }
951
952 // Walk AST and replace in parent nodes.
953 for _, file := range files {
954 replaceInNode(file, rewriteSet)
955 }
956 }
957
958 // replaceInNode walks a node and replaces any child expressions that are
959 // in the rewrite set with __moxie_concat(left, right) calls.
960 func replaceInNode(node ast.Node, set map[*ast.BinaryExpr]bool) {
961 ast.Inspect(node, func(n ast.Node) bool {
962 switch parent := n.(type) {
963 case *ast.AssignStmt:
964 for i, rhs := range parent.Rhs {
965 parent.Rhs[i] = maybeReplacePipe(rhs, set)
966 }
967 case *ast.ValueSpec:
968 for i, val := range parent.Values {
969 parent.Values[i] = maybeReplacePipe(val, set)
970 }
971 case *ast.ReturnStmt:
972 for i, result := range parent.Results {
973 parent.Results[i] = maybeReplacePipe(result, set)
974 }
975 case *ast.CallExpr:
976 for i, arg := range parent.Args {
977 parent.Args[i] = maybeReplacePipe(arg, set)
978 }
979 case *ast.SendStmt:
980 parent.Value = maybeReplacePipe(parent.Value, set)
981 case *ast.BinaryExpr:
982 parent.X = maybeReplacePipe(parent.X, set)
983 parent.Y = maybeReplacePipe(parent.Y, set)
984 case *ast.ParenExpr:
985 parent.X = maybeReplacePipe(parent.X, set)
986 case *ast.IndexExpr:
987 parent.Index = maybeReplacePipe(parent.Index, set)
988 case *ast.KeyValueExpr:
989 parent.Value = maybeReplacePipe(parent.Value, set)
990 case *ast.CompositeLit:
991 for i, elt := range parent.Elts {
992 parent.Elts[i] = maybeReplacePipe(elt, set)
993 }
994 }
995 return true
996 })
997 }
998
999 func maybeReplacePipe(expr ast.Expr, set map[*ast.BinaryExpr]bool) ast.Expr {
1000 bin, ok := expr.(*ast.BinaryExpr)
1001 if !ok || !set[bin] {
1002 return expr
1003 }
1004 // Replace: a | b → __moxie_concat(a, b)
1005 return &ast.CallExpr{
1006 Fun: &ast.Ident{Name: "__moxie_concat"},
1007 Args: []ast.Expr{bin.X, bin.Y},
1008 }
1009 }
1010
1011 // filterPipeErrors removes type errors about | on []byte from the error list.
1012 func filterPipeErrors(errs []error) []error {
1013 var filtered []error
1014 for _, err := range errs {
1015 msg := err.Error()
1016 // Go type checker error for | on slices looks like:
1017 // "invalid operation: ... (operator | not defined on ...)"
1018 if strings.Contains(msg, "operator |") && strings.Contains(msg, "[]") {
1019 continue
1020 }
1021 // Also filter "operator | not defined on untyped string" which can
1022 // happen when | is used between string literals before the string
1023 // rewrite converts them to []byte.
1024 if strings.Contains(msg, "operator |") && strings.Contains(msg, "string") {
1025 continue
1026 }
1027 // Filter "operator + not defined on []byte" — unconverted string concat.
1028 if strings.Contains(msg, "operator +") && strings.Contains(msg, "[]byte") {
1029 continue
1030 }
1031 // Filter mismatched types from + between []byte and string.
1032 if strings.Contains(msg, "mismatched types") && strings.Contains(msg, "operator +") {
1033 continue
1034 }
1035 filtered = append(filtered, err)
1036 }
1037 return filtered
1038 }
1039
1040 // ---------------------------------------------------------------------------
1041 // 4. Byte slice comparison rewrite (AST-level, after first typecheck pass)
1042 // ---------------------------------------------------------------------------
1043 //
1044 // Moxie uses []byte as its text type. Go's type checker doesn't allow
1045 // ==, !=, <, <=, >, >= on slices. This rewrite converts []byte comparisons
1046 // to __moxie_eq / __moxie_lt calls, and converts switch statements on []byte
1047 // to tag-less switches with __moxie_eq calls.
1048
1049 // findByteComparisons finds binary expressions comparing two []byte values.
1050 func findByteComparisons(files []*ast.File, info *types.Info) []*ast.BinaryExpr {
1051 var result []*ast.BinaryExpr
1052 for _, file := range files {
1053 ast.Inspect(file, func(n ast.Node) bool {
1054 bin, ok := n.(*ast.BinaryExpr)
1055 if !ok {
1056 return true
1057 }
1058 switch bin.Op {
1059 case token.EQL, token.NEQ, token.LSS, token.LEQ, token.GTR, token.GEQ:
1060 default:
1061 return true
1062 }
1063 xType := info.TypeOf(bin.X)
1064 yType := info.TypeOf(bin.Y)
1065 if xType == nil || yType == nil {
1066 return true
1067 }
1068 if isByteSlice(xType) || isByteSlice(yType) {
1069 result = append(result, bin)
1070 }
1071 return true
1072 })
1073 }
1074 return result
1075 }
1076
1077 // applyByteComparisonRewrites replaces []byte comparison expressions with
1078 // __moxie_eq / __moxie_lt function calls.
1079 func applyByteComparisonRewrites(files []*ast.File, exprs []*ast.BinaryExpr) {
1080 set := make(map[*ast.BinaryExpr]bool)
1081 for _, e := range exprs {
1082 set[e] = true
1083 }
1084 if len(set) == 0 {
1085 return
1086 }
1087 for _, file := range files {
1088 replaceComparisons(file, set)
1089 }
1090 }
1091
1092 func replaceComparisons(node ast.Node, set map[*ast.BinaryExpr]bool) {
1093 ast.Inspect(node, func(n ast.Node) bool {
1094 switch parent := n.(type) {
1095 case *ast.AssignStmt:
1096 for i, rhs := range parent.Rhs {
1097 parent.Rhs[i] = maybeReplaceCmp(rhs, set)
1098 }
1099 case *ast.ValueSpec:
1100 for i, val := range parent.Values {
1101 parent.Values[i] = maybeReplaceCmp(val, set)
1102 }
1103 case *ast.ReturnStmt:
1104 for i, result := range parent.Results {
1105 parent.Results[i] = maybeReplaceCmp(result, set)
1106 }
1107 case *ast.CallExpr:
1108 for i, arg := range parent.Args {
1109 parent.Args[i] = maybeReplaceCmp(arg, set)
1110 }
1111 case *ast.IfStmt:
1112 parent.Cond = maybeReplaceCmp(parent.Cond, set)
1113 case *ast.ForStmt:
1114 if parent.Cond != nil {
1115 parent.Cond = maybeReplaceCmp(parent.Cond, set)
1116 }
1117 case *ast.BinaryExpr:
1118 // Handle nested: (a == b) && (c == d)
1119 parent.X = maybeReplaceCmp(parent.X, set)
1120 parent.Y = maybeReplaceCmp(parent.Y, set)
1121 case *ast.UnaryExpr:
1122 parent.X = maybeReplaceCmp(parent.X, set)
1123 case *ast.ParenExpr:
1124 parent.X = maybeReplaceCmp(parent.X, set)
1125 case *ast.CaseClause:
1126 for i, val := range parent.List {
1127 parent.List[i] = maybeReplaceCmp(val, set)
1128 }
1129 case *ast.SendStmt:
1130 parent.Value = maybeReplaceCmp(parent.Value, set)
1131 case *ast.CompositeLit:
1132 for i, elt := range parent.Elts {
1133 parent.Elts[i] = maybeReplaceCmp(elt, set)
1134 }
1135 }
1136 return true
1137 })
1138 }
1139
1140 func maybeReplaceCmp(expr ast.Expr, set map[*ast.BinaryExpr]bool) ast.Expr {
1141 bin, ok := expr.(*ast.BinaryExpr)
1142 if !ok || !set[bin] {
1143 return expr
1144 }
1145 switch bin.Op {
1146 case token.EQL:
1147 // a == b → __moxie_eq(a, b)
1148 return &ast.CallExpr{
1149 Fun: &ast.Ident{Name: "__moxie_eq"},
1150 Args: []ast.Expr{bin.X, bin.Y},
1151 }
1152 case token.NEQ:
1153 // a != b → !__moxie_eq(a, b)
1154 return &ast.UnaryExpr{
1155 Op: token.NOT,
1156 X: &ast.CallExpr{
1157 Fun: &ast.Ident{Name: "__moxie_eq"},
1158 Args: []ast.Expr{bin.X, bin.Y},
1159 },
1160 }
1161 case token.LSS:
1162 // a < b → __moxie_lt(a, b)
1163 return &ast.CallExpr{
1164 Fun: &ast.Ident{Name: "__moxie_lt"},
1165 Args: []ast.Expr{bin.X, bin.Y},
1166 }
1167 case token.LEQ:
1168 // a <= b → !__moxie_lt(b, a)
1169 return &ast.UnaryExpr{
1170 Op: token.NOT,
1171 X: &ast.CallExpr{
1172 Fun: &ast.Ident{Name: "__moxie_lt"},
1173 Args: []ast.Expr{bin.Y, bin.X},
1174 },
1175 }
1176 case token.GTR:
1177 // a > b → __moxie_lt(b, a)
1178 return &ast.CallExpr{
1179 Fun: &ast.Ident{Name: "__moxie_lt"},
1180 Args: []ast.Expr{bin.Y, bin.X},
1181 }
1182 case token.GEQ:
1183 // a >= b → !__moxie_lt(a, b)
1184 return &ast.UnaryExpr{
1185 Op: token.NOT,
1186 X: &ast.CallExpr{
1187 Fun: &ast.Ident{Name: "__moxie_lt"},
1188 Args: []ast.Expr{bin.X, bin.Y},
1189 },
1190 }
1191 }
1192 return expr
1193 }
1194
1195 // findByteSwitches finds switch statements that switch on a []byte expression.
1196 func findByteSwitches(files []*ast.File, info *types.Info) []*ast.SwitchStmt {
1197 var result []*ast.SwitchStmt
1198 for _, file := range files {
1199 ast.Inspect(file, func(n ast.Node) bool {
1200 sw, ok := n.(*ast.SwitchStmt)
1201 if !ok || sw.Tag == nil {
1202 return true
1203 }
1204 tagType := info.TypeOf(sw.Tag)
1205 if tagType != nil && isByteSlice(tagType) {
1206 result = append(result, sw)
1207 }
1208 return true
1209 })
1210 }
1211 return result
1212 }
1213
1214 // applyByteSwitchRewrites converts switch statements on []byte to tag-less
1215 // switches with __moxie_eq calls.
1216 //
1217 // switch x { case "a": ... } → switch { case __moxie_eq(x, []byte("a")): ... }
1218 func applyByteSwitchRewrites(switches []*ast.SwitchStmt) {
1219 for _, sw := range switches {
1220 tag := sw.Tag
1221 sw.Tag = nil // make it a tag-less switch
1222 for _, stmt := range sw.Body.List {
1223 cc, ok := stmt.(*ast.CaseClause)
1224 if !ok || cc.List == nil {
1225 continue // default clause
1226 }
1227 for i, val := range cc.List {
1228 cc.List[i] = &ast.CallExpr{
1229 Fun: &ast.Ident{Name: "__moxie_eq"},
1230 Args: []ast.Expr{tag, val},
1231 }
1232 }
1233 }
1234 }
1235 }
1236
1237 // filterByteCompareErrors removes type errors about []byte comparison.
1238 func filterByteCompareErrors(errs []error) []error {
1239 var filtered []error
1240 for _, err := range errs {
1241 msg := err.Error()
1242 if strings.Contains(msg, "slice can only be compared to nil") {
1243 continue
1244 }
1245 if strings.Contains(msg, "mismatched types []byte and untyped string") {
1246 continue
1247 }
1248 if strings.Contains(msg, "cannot convert") && strings.Contains(msg, "untyped string") && strings.Contains(msg, "[]byte") {
1249 continue
1250 }
1251 // "invalid case" errors from switch on []byte
1252 if strings.Contains(msg, "invalid case") && strings.Contains(msg, "[]byte") {
1253 continue
1254 }
1255 filtered = append(filtered, err)
1256 }
1257 return filtered
1258 }
1259
1260 // filterStringByteMismatch removes type errors about string/[]byte mismatches.
1261 // In moxie, string and []byte are the same type, so these errors are spurious.
1262 func filterStringByteMismatch(errs []error) []error {
1263 var filtered []error
1264 for _, err := range errs {
1265 msg := err.Error()
1266 if strings.Contains(msg, "[]byte") && strings.Contains(msg, "string") &&
1267 (strings.Contains(msg, "cannot use") || strings.Contains(msg, "cannot convert") ||
1268 strings.Contains(msg, "mismatched types") || strings.Contains(msg, "does not satisfy")) {
1269 continue
1270 }
1271 filtered = append(filtered, err)
1272 }
1273 return filtered
1274 }
1275