main.go raw
1 // mxpurify rewrites .mx source files from Go syntax to Moxie syntax.
2 //
3 // Transformations:
4 // - string type → []byte
5 // - int type → int32 (not int8/16/64)
6 // - uint type → uint32 (not uint8/16/64)
7 // - make(chan T) → chan T{}, make(chan T, n) → chan T{n}
8 // - make(map[K]V) → map[K]V{}, make(map[K]V, n) → map[K]V{}
9 // - make([]T, len) → []T{:len}, make([]T, len, cap) → []T{:len:cap}
10 // - new(T) → &T{}
11 //
12 // Usage: mxpurify [-dry-run] [-v] file.mx [file2.mx ...]
13 // mxpurify [-dry-run] [-v] -dir /path/to/package
14 package main
15
16 import (
17 "flag"
18 "fmt"
19 "go/ast"
20 "go/parser"
21 "go/printer"
22 "go/scanner"
23 "go/token"
24 "os"
25 "path/filepath"
26 "strings"
27 )
28
29 var (
30 dryRun = flag.Bool("dry-run", false, "print changes without writing")
31 verbose = flag.Bool("v", false, "verbose output")
32 dir = flag.String("dir", "", "rewrite all .mx files in directory")
33 noString = flag.Bool("no-string", false, "skip string→[]byte conversion (only do make/new)")
34 )
35
36 func main() {
37 flag.Parse()
38
39 var files []string
40 if *dir != "" {
41 entries, err := os.ReadDir(*dir)
42 if err != nil {
43 fmt.Fprintf(os.Stderr, "error: %v\n", err)
44 os.Exit(1)
45 }
46 for _, e := range entries {
47 if !e.IsDir() && strings.HasSuffix(e.Name(), ".mx") && !strings.HasSuffix(e.Name(), "_test.mx") {
48 files = append(files, filepath.Join(*dir, e.Name()))
49 }
50 }
51 }
52 files = append(files, flag.Args()...)
53
54 if len(files) == 0 {
55 fmt.Fprintln(os.Stderr, "usage: mxpurify [-dry-run] [-v] [-dir DIR] [file.mx ...]")
56 os.Exit(1)
57 }
58
59 totalChanges := 0
60 for _, f := range files {
61 n, err := rewriteFile(f)
62 if err != nil {
63 fmt.Fprintf(os.Stderr, "%s: %v\n", f, err)
64 continue
65 }
66 totalChanges += n
67 }
68 fmt.Printf("total: %d changes across %d files\n", totalChanges, len(files))
69 }
70
71 func rewriteFile(path string) (int, error) {
72 fset := token.NewFileSet()
73 // Parse as .go since .mx is syntactically Go.
74 file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
75 if err != nil {
76 return 0, fmt.Errorf("parse: %w", err)
77 }
78
79 changes := 0
80
81 // Walk AST and apply transformations.
82 changes += rewriteTypes(file)
83 changes += rewriteMakeCalls(file, fset)
84 changes += rewriteNewCalls(file)
85
86 // Write AST to buffer via go/printer.
87 var buf strings.Builder
88 cfg := &printer.Config{Mode: printer.UseSpaces | printer.TabIndent, Tabwidth: 8}
89 if err := cfg.Fprint(&buf, fset, file); err != nil {
90 return 0, fmt.Errorf("print: %w", err)
91 }
92
93 // Text-level pass: make([]T, len) → []T{:len}, make([]T, len, cap) → []T{:len:cap}
94 output := buf.String()
95 output, sliceChanges := rewriteSliceMakes(output)
96 changes += sliceChanges
97
98 if changes == 0 {
99 if *verbose {
100 fmt.Printf("%s: no changes\n", path)
101 }
102 return 0, nil
103 }
104
105 if *dryRun {
106 fmt.Printf("%s: %d changes (dry run)\n", path, changes)
107 return changes, nil
108 }
109
110 if err := os.WriteFile(path, []byte(output), 0644); err != nil {
111 return 0, err
112 }
113
114 if *verbose {
115 fmt.Printf("%s: %d changes\n", path, changes)
116 }
117 return changes, nil
118 }
119
120 // rewriteTypes replaces type identifiers: string→[]byte.
121 // int/uint conversion is deferred until the compiler can alias int=int32.
122 func rewriteTypes(file *ast.File) int {
123 if *noString {
124 return 0
125 }
126 // Collect interface body positions to skip type constraints.
127 ifaceRanges := collectInterfaceRanges(file)
128
129 changes := 0
130 ast.Inspect(file, func(n ast.Node) bool {
131 ident, ok := n.(*ast.Ident)
132 if !ok {
133 return true
134 }
135 // Skip idents inside interface type declarations (type constraints
136 // like ~int | ~string). These can't be mechanically rewritten
137 // because ~[]byte is invalid Go and ~int32 creates duplicates.
138 if isInRanges(ident.Pos(), ifaceRanges) {
139 return true
140 }
141
142 // Note: int/uint conversion is deferred until the compiler can
143 // alias int=int32 in the type checker. len()/cap()/copy() return
144 // int, so converting int→int32 creates pervasive type mismatches.
145 return true
146 })
147
148 changes += rewriteStringType(file, ifaceRanges)
149 return changes
150 }
151
152 type posRange struct{ start, end token.Pos }
153
154 func collectInterfaceRanges(file *ast.File) []posRange {
155 var ranges []posRange
156 ast.Inspect(file, func(n ast.Node) bool {
157 iface, ok := n.(*ast.InterfaceType)
158 if ok && iface.Methods != nil {
159 ranges = append(ranges, posRange{
160 start: iface.Methods.Opening,
161 end: iface.Methods.Closing,
162 })
163 }
164 return true
165 })
166 return ranges
167 }
168
169 func isInRanges(pos token.Pos, ranges []posRange) bool {
170 for _, r := range ranges {
171 if pos >= r.start && pos <= r.end {
172 return true
173 }
174 }
175 return false
176 }
177
178 // collectExemptStringPositions finds string idents in Error()/String()
179 // return types that must stay string (Go built-in interface satisfaction).
180 // Also collects string idents inside the bodies of these methods so that
181 // string() conversions needed at the interface boundary are preserved.
182 func collectExemptStringPositions(file *ast.File) map[token.Pos]bool {
183 exempt := make(map[token.Pos]bool)
184 for _, decl := range file.Decls {
185 fd, ok := decl.(*ast.FuncDecl)
186 if !ok || fd.Recv == nil {
187 continue
188 }
189 switch fd.Name.Name {
190 case "Error", "String":
191 if fd.Type.Params != nil && len(fd.Type.Params.List) > 0 {
192 continue
193 }
194 if fd.Type.Results == nil || len(fd.Type.Results.List) != 1 {
195 continue
196 }
197 // Exempt the return type.
198 if ident, ok := fd.Type.Results.List[0].Type.(*ast.Ident); ok && ident.Name == "string" {
199 exempt[ident.NamePos] = true
200 }
201 // Exempt all string idents in the method body (string() conversions
202 // are needed to satisfy the string return type).
203 if fd.Body != nil {
204 ast.Inspect(fd.Body, func(n ast.Node) bool {
205 if ident, ok := n.(*ast.Ident); ok && ident.Name == "string" {
206 exempt[ident.NamePos] = true
207 }
208 return true
209 })
210 }
211 }
212 }
213 return exempt
214 }
215
216 // isTypePosition heuristically checks if an ident is used as a type (not a
217 // conversion call, not part of a longer name, etc.).
218 // This uses parent-node analysis to distinguish type positions.
219 func isTypePosition(ident *ast.Ident, file *ast.File) bool {
220 // Walk the AST to find the parent of this ident.
221 parent := findParent(file, ident)
222 if parent == nil {
223 return false
224 }
225
226 switch p := parent.(type) {
227 case *ast.Field:
228 // Function parameters, struct fields, return types.
229 return p.Type == ident
230 case *ast.ValueSpec:
231 // var x int
232 return p.Type == ident
233 case *ast.ArrayType:
234 // []int, [N]int
235 return p.Elt == ident
236 case *ast.MapType:
237 // map[int]V or map[K]int
238 return p.Key == ident || p.Value == ident
239 case *ast.ChanType:
240 // chan int
241 return p.Value == ident
242 case *ast.StarExpr:
243 // *int
244 return p.X == ident
245 case *ast.TypeSpec:
246 // type X int
247 return p.Type == ident
248 case *ast.CompositeLit:
249 // T{...} — the type of a composite literal
250 return p.Type == ident
251 case *ast.FuncType:
252 // standalone type usage in func type
253 return false
254 case *ast.UnaryExpr:
255 // ~int in type constraints
256 if p.Op == token.TILDE {
257 return true
258 }
259 case *ast.CallExpr:
260 // int(x) — type conversion. We want to change this too.
261 return p.Fun == ident
262 case *ast.BinaryExpr:
263 // int | uint in type constraints
264 return true
265 case *ast.InterfaceType:
266 return false
267 case *ast.Ellipsis:
268 // ...int (variadic)
269 return p.Elt == ident
270 case *ast.CaseClause:
271 // case int: in type switch
272 for _, e := range p.List {
273 if e == ident {
274 return true
275 }
276 }
277 case *ast.TypeAssertExpr:
278 // x.(int)
279 return p.Type == ident
280 }
281 return false
282 }
283
284 // findParent returns the immediate parent node of target in the AST.
285 func findParent(root ast.Node, target ast.Node) ast.Node {
286 var parent ast.Node
287 ast.Inspect(root, func(n ast.Node) bool {
288 if n == nil || parent != nil {
289 return false
290 }
291 // Check all child nodes.
292 found := false
293 ast.Inspect(n, func(child ast.Node) bool {
294 if child == n {
295 return true // skip self
296 }
297 if child == target {
298 found = true
299 return false
300 }
301 return false // don't recurse — we only want direct children
302 })
303 if found {
304 parent = n
305 return false
306 }
307 return true
308 })
309 return parent
310 }
311
312 // rewriteStringType replaces string idents with []byte in type positions.
313 // This requires parent-node replacement since we change node type (Ident → ArrayType).
314 func rewriteStringType(file *ast.File, ifaceRanges []posRange) int {
315 // Collect positions of return type fields in Error()/String() methods.
316 // These must stay string to satisfy Go's built-in interfaces.
317 exemptPositions := collectExemptStringPositions(file)
318
319 changes := 0
320 byteSlice := func(pos token.Pos) *ast.ArrayType {
321 return &ast.ArrayType{
322 Lbrack: pos,
323 Elt: &ast.Ident{Name: "byte", NamePos: pos},
324 }
325 }
326
327 // Walk fields (function params, returns, struct fields).
328 ast.Inspect(file, func(n ast.Node) bool {
329 // Skip nodes inside interface bodies (type constraints).
330 if n != nil {
331 if pos := n.Pos(); pos.IsValid() && isInRanges(pos, ifaceRanges) {
332 return false
333 }
334 }
335 switch node := n.(type) {
336 case *ast.Field:
337 if ident, ok := node.Type.(*ast.Ident); ok && ident.Name == "string" {
338 if exemptPositions[ident.NamePos] {
339 return true // Keep string for Error()/String() returns
340 }
341 node.Type = byteSlice(ident.NamePos)
342 changes++
343 }
344 case *ast.ValueSpec:
345 if ident, ok := node.Type.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
346 node.Type = byteSlice(ident.NamePos)
347 changes++
348 }
349 case *ast.ArrayType:
350 if ident, ok := node.Elt.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
351 node.Elt = byteSlice(ident.NamePos)
352 changes++
353 }
354 case *ast.MapType:
355 // Don't change map keys from string — maps need string keys internally.
356 // Only change map values.
357 if ident, ok := node.Value.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
358 node.Value = byteSlice(ident.NamePos)
359 changes++
360 }
361 case *ast.ChanType:
362 if ident, ok := node.Value.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
363 node.Value = byteSlice(ident.NamePos)
364 changes++
365 }
366 case *ast.StarExpr:
367 if ident, ok := node.X.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
368 node.X = byteSlice(ident.NamePos)
369 changes++
370 }
371 case *ast.TypeSpec:
372 if ident, ok := node.Type.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
373 node.Type = byteSlice(ident.NamePos)
374 changes++
375 }
376 case *ast.CallExpr:
377 // string(x) → []byte(x) — type conversion (skip inside Error/String methods)
378 if ident, ok := node.Fun.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
379 node.Fun = byteSlice(ident.NamePos)
380 changes++
381 }
382 case *ast.Ellipsis:
383 // ...string (variadic params)
384 if ident, ok := node.Elt.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
385 node.Elt = byteSlice(ident.NamePos)
386 changes++
387 }
388 case *ast.CaseClause:
389 // case string: (type switches)
390 for i, expr := range node.List {
391 if ident, ok := expr.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
392 node.List[i] = byteSlice(ident.NamePos)
393 changes++
394 }
395 }
396 case *ast.TypeAssertExpr:
397 // x.(string) → x.([]byte)
398 if ident, ok := node.Type.(*ast.Ident); ok && ident.Name == "string" && !exemptPositions[ident.NamePos] {
399 node.Type = byteSlice(ident.NamePos)
400 changes++
401 }
402 }
403 return true
404 })
405 return changes
406 }
407
408 // rewriteMakeCalls rewrites make() calls to Moxie literal syntax.
409 func rewriteMakeCalls(file *ast.File, fset *token.FileSet) int {
410 changes := 0
411 rewriteInExprs(file, func(expr ast.Expr) ast.Expr {
412 call, ok := expr.(*ast.CallExpr)
413 if !ok {
414 return nil
415 }
416 ident, ok := call.Fun.(*ast.Ident)
417 if !ok || ident.Name != "make" {
418 return nil
419 }
420 if len(call.Args) < 1 {
421 return nil
422 }
423
424 typeArg := call.Args[0]
425
426 switch t := typeArg.(type) {
427 case *ast.ChanType:
428 // make(chan T) → chan T{}, make(chan T, n) → chan T{n}
429 changes++
430 if len(call.Args) >= 2 {
431 return &ast.CompositeLit{
432 Type: t,
433 Elts: []ast.Expr{call.Args[1]},
434 }
435 }
436 return &ast.CompositeLit{Type: t}
437
438 case *ast.MapType:
439 // make(map[K]V) → map[K]V{}, make(map[K]V, n) → map[K]V{}
440 // Drop capacity hint.
441 changes++
442 return &ast.CompositeLit{Type: t}
443
444 case *ast.ArrayType:
445 if t.Len != nil {
446 return nil // [N]T, not a slice
447 }
448 // make([]T, len) or make([]T, len, cap)
449 // We can't represent []T{:len} in the Go AST since it's not valid Go.
450 // Instead, keep the make() call but print a TODO comment.
451 // The text-level rewrite in the compiler handles the Moxie syntax.
452 //
453 // For now, leave slice makes as-is — they'll be rewritten
454 // by the compiler's text-level pass when we convert them to
455 // []T{:len} syntax manually or in a second pass.
456 return nil
457 }
458 return nil
459 })
460 return changes
461 }
462
463 // rewriteSliceMakes converts make([]T, len) and make([]T, len, cap) in text.
464 // Returns modified text and number of changes. Uses tokenizer to match:
465 // make([]T, len) → []T{:len}
466 // make([]T, len, cap) → []T{:len:cap}
467 func rewriteSliceMakes(src string) (string, int) {
468 fset := token.NewFileSet()
469 srcBytes := []byte(src)
470 file := fset.AddFile("", fset.Base(), len(srcBytes))
471 var s scanner.Scanner
472 s.Init(file, srcBytes, nil, scanner.ScanComments)
473
474 type tok struct {
475 pos int
476 end int
477 tok token.Token
478 lit string
479 }
480 var toks []tok
481 for {
482 pos, t, lit := s.Scan()
483 if t == token.EOF {
484 break
485 }
486 offset := file.Offset(pos)
487 end := offset + len(lit)
488 if lit == "" {
489 end = offset + len(t.String())
490 }
491 toks = append(toks, tok{pos: offset, end: end, tok: t, lit: lit})
492 }
493
494 var result strings.Builder
495 lastEnd := 0
496 changes := 0
497
498 for i := 0; i < len(toks); i++ {
499 // Look for: make LPAREN LBRACK RBRACK ...
500 if toks[i].tok != token.IDENT || toks[i].lit != "make" {
501 continue
502 }
503 if i+3 >= len(toks) {
504 continue
505 }
506 if toks[i+1].tok != token.LPAREN {
507 continue
508 }
509 if toks[i+2].tok != token.LBRACK {
510 continue
511 }
512 if toks[i+3].tok != token.RBRACK {
513 continue
514 }
515
516 makeIdx := i
517 lparenIdx := i + 1
518 lbrackIdx := i + 2
519
520 // Find the matching close paren for the make call.
521 closeParenIdx := -1
522 depth := 1
523 for j := lparenIdx + 1; j < len(toks); j++ {
524 switch toks[j].tok {
525 case token.LPAREN:
526 depth++
527 case token.RPAREN:
528 depth--
529 if depth == 0 {
530 closeParenIdx = j
531 }
532 }
533 if closeParenIdx >= 0 {
534 break
535 }
536 }
537 if closeParenIdx < 0 {
538 continue
539 }
540
541 // Find commas at depth 1 (inside the make call, not nested).
542 // make([]T, len) has 1 comma, make([]T, len, cap) has 2.
543 var commaPositions []int
544 depth = 0
545 for j := lparenIdx + 1; j < closeParenIdx; j++ {
546 switch toks[j].tok {
547 case token.LPAREN, token.LBRACK, token.LBRACE:
548 depth++
549 case token.RPAREN, token.RBRACK, token.RBRACE:
550 depth--
551 case token.COMMA:
552 if depth == 0 {
553 commaPositions = append(commaPositions, j)
554 }
555 }
556 }
557
558 if len(commaPositions) < 1 || len(commaPositions) > 2 {
559 continue // not make([]T, len) or make([]T, len, cap)
560 }
561
562 // Extract slice type: everything from [ to just before first comma.
563 typeText := strings.TrimSpace(string(srcBytes[toks[lbrackIdx].pos:toks[commaPositions[0]].pos]))
564
565 if len(commaPositions) == 1 {
566 // make([]T, len) → []T{:len}
567 lenText := strings.TrimSpace(string(srcBytes[toks[commaPositions[0]].end:toks[closeParenIdx].pos]))
568 result.Write(srcBytes[lastEnd:toks[makeIdx].pos])
569 result.WriteString(typeText)
570 result.WriteString("{:")
571 result.WriteString(lenText)
572 result.WriteString("}")
573 changes++
574 } else {
575 // make([]T, len, cap) → []T{:len:cap}
576 lenText := strings.TrimSpace(string(srcBytes[toks[commaPositions[0]].end:toks[commaPositions[1]].pos]))
577 capText := strings.TrimSpace(string(srcBytes[toks[commaPositions[1]].end:toks[closeParenIdx].pos]))
578 result.Write(srcBytes[lastEnd:toks[makeIdx].pos])
579 result.WriteString(typeText)
580 result.WriteString("{:")
581 result.WriteString(lenText)
582 result.WriteString(":")
583 result.WriteString(capText)
584 result.WriteString("}")
585 changes++
586 }
587
588 lastEnd = toks[closeParenIdx].end
589 i = closeParenIdx
590 }
591
592 if lastEnd == 0 {
593 return src, 0
594 }
595 result.Write(srcBytes[lastEnd:])
596 return result.String(), changes
597 }
598
599 // rewriteNewCalls rewrites new(T) to &T{}.
600 func rewriteNewCalls(file *ast.File) int {
601 changes := 0
602 rewriteInExprs(file, func(expr ast.Expr) ast.Expr {
603 call, ok := expr.(*ast.CallExpr)
604 if !ok {
605 return nil
606 }
607 ident, ok := call.Fun.(*ast.Ident)
608 if !ok || ident.Name != "new" || len(call.Args) != 1 {
609 return nil
610 }
611 changes++
612 return &ast.UnaryExpr{
613 Op: token.AND,
614 X: &ast.CompositeLit{
615 Type: call.Args[0],
616 },
617 }
618 })
619 return changes
620 }
621
622 // rewriteInExprs walks the AST and replaces expressions using a transform function.
623 // If transform returns non-nil, the expression is replaced.
624 func rewriteInExprs(file *ast.File, transform func(ast.Expr) ast.Expr) {
625 ast.Inspect(file, func(n ast.Node) bool {
626 switch node := n.(type) {
627 case *ast.AssignStmt:
628 for i, rhs := range node.Rhs {
629 if repl := transform(rhs); repl != nil {
630 node.Rhs[i] = repl
631 }
632 }
633 case *ast.ValueSpec:
634 for i, val := range node.Values {
635 if repl := transform(val); repl != nil {
636 node.Values[i] = repl
637 }
638 }
639 case *ast.ReturnStmt:
640 for i, result := range node.Results {
641 if repl := transform(result); repl != nil {
642 node.Results[i] = repl
643 }
644 }
645 case *ast.CallExpr:
646 for i, arg := range node.Args {
647 if repl := transform(arg); repl != nil {
648 node.Args[i] = repl
649 }
650 }
651 if repl := transform(node.Fun); repl != nil {
652 // Unlikely but handle func-position make/new
653 }
654 case *ast.SendStmt:
655 if repl := transform(node.Value); repl != nil {
656 node.Value = repl
657 }
658 case *ast.ExprStmt:
659 if repl := transform(node.X); repl != nil {
660 node.X = repl
661 }
662 case *ast.KeyValueExpr:
663 if repl := transform(node.Value); repl != nil {
664 node.Value = repl
665 }
666 case *ast.CompositeLit:
667 for i, elt := range node.Elts {
668 if repl := transform(elt); repl != nil {
669 node.Elts[i] = repl
670 }
671 }
672 case *ast.BinaryExpr:
673 if repl := transform(node.X); repl != nil {
674 node.X = repl
675 }
676 if repl := transform(node.Y); repl != nil {
677 node.Y = repl
678 }
679 case *ast.UnaryExpr:
680 if repl := transform(node.X); repl != nil {
681 node.X = repl
682 }
683 case *ast.ParenExpr:
684 if repl := transform(node.X); repl != nil {
685 node.X = repl
686 }
687 case *ast.SelectorExpr:
688 // new(T).Method() — new is SelectorExpr.X
689 if repl := transform(node.X); repl != nil {
690 node.X = repl
691 }
692 case *ast.IfStmt:
693 // init statement might have assignments
694 case *ast.SwitchStmt:
695 // init and tag
696 case *ast.IndexExpr:
697 if repl := transform(node.Index); repl != nil {
698 node.Index = repl
699 }
700 }
701 return true
702 })
703 }
704