st1018.go raw

   1  package st1018
   2  
   3  import (
   4  	"fmt"
   5  	"go/ast"
   6  	"go/token"
   7  	"strconv"
   8  	"unicode"
   9  	"unicode/utf8"
  10  
  11  	"honnef.co/go/tools/analysis/code"
  12  	"honnef.co/go/tools/analysis/lint"
  13  	"honnef.co/go/tools/analysis/report"
  14  
  15  	"golang.org/x/tools/go/analysis"
  16  	"golang.org/x/tools/go/analysis/passes/inspect"
  17  )
  18  
  19  var SCAnalyzer = lint.InitializeAnalyzer(&lint.Analyzer{
  20  	Analyzer: &analysis.Analyzer{
  21  		Name:     "ST1018",
  22  		Run:      run,
  23  		Requires: []*analysis.Analyzer{inspect.Analyzer},
  24  	},
  25  	Doc: &lint.RawDocumentation{
  26  		Title:   `Avoid zero-width and control characters in string literals`,
  27  		Since:   "2019.2",
  28  		MergeIf: lint.MergeIfAny,
  29  	},
  30  })
  31  
  32  var Analyzer = SCAnalyzer.Analyzer
  33  
  34  func run(pass *analysis.Pass) (interface{}, error) {
  35  	fn := func(node ast.Node) {
  36  		lit := node.(*ast.BasicLit)
  37  		if lit.Kind != token.STRING {
  38  			return
  39  		}
  40  
  41  		type invalid struct {
  42  			r   rune
  43  			off int
  44  		}
  45  		var invalids []invalid
  46  		hasFormat := false
  47  		hasControl := false
  48  		prev := rune(-1)
  49  		const zwj = '\u200d'
  50  		for off, r := range lit.Value {
  51  			if unicode.Is(unicode.Cf, r) {
  52  				if r >= '\U000e0020' && r <= '\U000e007f' {
  53  					// These are used for spelling out country codes for flag emoji
  54  				} else if unicode.Is(unicode.Variation_Selector, r) {
  55  					// Always allow variation selectors
  56  				} else if r == zwj && (unicode.Is(unicode.S, prev) || unicode.Is(unicode.Variation_Selector, prev)) {
  57  					// Allow zero-width joiner in emoji, including those that use variation selectors.
  58  
  59  					// Technically some foreign scripts make valid use of zero-width joiners, too, but for now we'll err
  60  					// on the side of flagging all non-emoji uses of ZWJ.
  61  				} else {
  62  					switch r {
  63  					case '\u0600', '\u0601', '\u0602', '\u0603', '\u0604', '\u0605', '\u0890', '\u0891', '\u08e2':
  64  						// Arabic characters that are not actually invisible. If anyone knows why these are in the
  65  						// Other, Format category please let me know.
  66  					case '\u061c', '\u202A', '\u202B', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u202C', '\u2069':
  67  						// Bidirectional formatting characters. At best they will render confusingly, at worst they're used
  68  						// to cause confusion.
  69  						fallthrough
  70  					default:
  71  						invalids = append(invalids, invalid{r, off})
  72  						hasFormat = true
  73  					}
  74  				}
  75  			} else if unicode.Is(unicode.Cc, r) && r != '\n' && r != '\t' && r != '\r' {
  76  				invalids = append(invalids, invalid{r, off})
  77  				hasControl = true
  78  			}
  79  			prev = r
  80  		}
  81  
  82  		switch len(invalids) {
  83  		case 0:
  84  			return
  85  		case 1:
  86  			var kind string
  87  			if hasFormat {
  88  				kind = "format"
  89  			} else if hasControl {
  90  				kind = "control"
  91  			} else {
  92  				panic("unreachable")
  93  			}
  94  
  95  			r := invalids[0]
  96  			msg := fmt.Sprintf("string literal contains the Unicode %s character %U, consider using the %q escape sequence instead", kind, r.r, r.r)
  97  
  98  			replacement := strconv.QuoteRune(r.r)
  99  			replacement = replacement[1 : len(replacement)-1]
 100  			edit := analysis.SuggestedFix{
 101  				Message: fmt.Sprintf("replace %s character %U with %q", kind, r.r, r.r),
 102  				TextEdits: []analysis.TextEdit{{
 103  					Pos:     lit.Pos() + token.Pos(r.off),
 104  					End:     lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
 105  					NewText: []byte(replacement),
 106  				}},
 107  			}
 108  			delete := analysis.SuggestedFix{
 109  				Message: fmt.Sprintf("delete %s character %U", kind, r.r),
 110  				TextEdits: []analysis.TextEdit{{
 111  					Pos: lit.Pos() + token.Pos(r.off),
 112  					End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
 113  				}},
 114  			}
 115  			report.Report(pass, lit, msg, report.Fixes(edit, delete))
 116  		default:
 117  			var kind string
 118  			if hasFormat && hasControl {
 119  				kind = "format and control"
 120  			} else if hasFormat {
 121  				kind = "format"
 122  			} else if hasControl {
 123  				kind = "control"
 124  			} else {
 125  				panic("unreachable")
 126  			}
 127  
 128  			msg := fmt.Sprintf("string literal contains Unicode %s characters, consider using escape sequences instead", kind)
 129  			var edits []analysis.TextEdit
 130  			var deletions []analysis.TextEdit
 131  			for _, r := range invalids {
 132  				replacement := strconv.QuoteRune(r.r)
 133  				replacement = replacement[1 : len(replacement)-1]
 134  				edits = append(edits, analysis.TextEdit{
 135  					Pos:     lit.Pos() + token.Pos(r.off),
 136  					End:     lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
 137  					NewText: []byte(replacement),
 138  				})
 139  				deletions = append(deletions, analysis.TextEdit{
 140  					Pos: lit.Pos() + token.Pos(r.off),
 141  					End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
 142  				})
 143  			}
 144  			edit := analysis.SuggestedFix{
 145  				Message:   fmt.Sprintf("replace all %s characters with escape sequences", kind),
 146  				TextEdits: edits,
 147  			}
 148  			delete := analysis.SuggestedFix{
 149  				Message:   fmt.Sprintf("delete all %s characters", kind),
 150  				TextEdits: deletions,
 151  			}
 152  			report.Report(pass, lit, msg, report.Fixes(edit, delete))
 153  		}
 154  	}
 155  	code.Preorder(pass, fn, (*ast.BasicLit)(nil))
 156  	return nil, nil
 157  }
 158