st1018.go raw
1 package st1018
2
3 import (
4 "fmt"
5 "go/ast"
6 "go/token"
7 "strconv"
8 "unicode"
9 "unicode/utf8"
10
11 "honnef.co/go/tools/analysis/code"
12 "honnef.co/go/tools/analysis/lint"
13 "honnef.co/go/tools/analysis/report"
14
15 "golang.org/x/tools/go/analysis"
16 "golang.org/x/tools/go/analysis/passes/inspect"
17 )
18
19 var SCAnalyzer = lint.InitializeAnalyzer(&lint.Analyzer{
20 Analyzer: &analysis.Analyzer{
21 Name: "ST1018",
22 Run: run,
23 Requires: []*analysis.Analyzer{inspect.Analyzer},
24 },
25 Doc: &lint.RawDocumentation{
26 Title: `Avoid zero-width and control characters in string literals`,
27 Since: "2019.2",
28 MergeIf: lint.MergeIfAny,
29 },
30 })
31
32 var Analyzer = SCAnalyzer.Analyzer
33
34 func run(pass *analysis.Pass) (interface{}, error) {
35 fn := func(node ast.Node) {
36 lit := node.(*ast.BasicLit)
37 if lit.Kind != token.STRING {
38 return
39 }
40
41 type invalid struct {
42 r rune
43 off int
44 }
45 var invalids []invalid
46 hasFormat := false
47 hasControl := false
48 prev := rune(-1)
49 const zwj = '\u200d'
50 for off, r := range lit.Value {
51 if unicode.Is(unicode.Cf, r) {
52 if r >= '\U000e0020' && r <= '\U000e007f' {
53 // These are used for spelling out country codes for flag emoji
54 } else if unicode.Is(unicode.Variation_Selector, r) {
55 // Always allow variation selectors
56 } else if r == zwj && (unicode.Is(unicode.S, prev) || unicode.Is(unicode.Variation_Selector, prev)) {
57 // Allow zero-width joiner in emoji, including those that use variation selectors.
58
59 // Technically some foreign scripts make valid use of zero-width joiners, too, but for now we'll err
60 // on the side of flagging all non-emoji uses of ZWJ.
61 } else {
62 switch r {
63 case '\u0600', '\u0601', '\u0602', '\u0603', '\u0604', '\u0605', '\u0890', '\u0891', '\u08e2':
64 // Arabic characters that are not actually invisible. If anyone knows why these are in the
65 // Other, Format category please let me know.
66 case '\u061c', '\u202A', '\u202B', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u202C', '\u2069':
67 // Bidirectional formatting characters. At best they will render confusingly, at worst they're used
68 // to cause confusion.
69 fallthrough
70 default:
71 invalids = append(invalids, invalid{r, off})
72 hasFormat = true
73 }
74 }
75 } else if unicode.Is(unicode.Cc, r) && r != '\n' && r != '\t' && r != '\r' {
76 invalids = append(invalids, invalid{r, off})
77 hasControl = true
78 }
79 prev = r
80 }
81
82 switch len(invalids) {
83 case 0:
84 return
85 case 1:
86 var kind string
87 if hasFormat {
88 kind = "format"
89 } else if hasControl {
90 kind = "control"
91 } else {
92 panic("unreachable")
93 }
94
95 r := invalids[0]
96 msg := fmt.Sprintf("string literal contains the Unicode %s character %U, consider using the %q escape sequence instead", kind, r.r, r.r)
97
98 replacement := strconv.QuoteRune(r.r)
99 replacement = replacement[1 : len(replacement)-1]
100 edit := analysis.SuggestedFix{
101 Message: fmt.Sprintf("replace %s character %U with %q", kind, r.r, r.r),
102 TextEdits: []analysis.TextEdit{{
103 Pos: lit.Pos() + token.Pos(r.off),
104 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
105 NewText: []byte(replacement),
106 }},
107 }
108 delete := analysis.SuggestedFix{
109 Message: fmt.Sprintf("delete %s character %U", kind, r.r),
110 TextEdits: []analysis.TextEdit{{
111 Pos: lit.Pos() + token.Pos(r.off),
112 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
113 }},
114 }
115 report.Report(pass, lit, msg, report.Fixes(edit, delete))
116 default:
117 var kind string
118 if hasFormat && hasControl {
119 kind = "format and control"
120 } else if hasFormat {
121 kind = "format"
122 } else if hasControl {
123 kind = "control"
124 } else {
125 panic("unreachable")
126 }
127
128 msg := fmt.Sprintf("string literal contains Unicode %s characters, consider using escape sequences instead", kind)
129 var edits []analysis.TextEdit
130 var deletions []analysis.TextEdit
131 for _, r := range invalids {
132 replacement := strconv.QuoteRune(r.r)
133 replacement = replacement[1 : len(replacement)-1]
134 edits = append(edits, analysis.TextEdit{
135 Pos: lit.Pos() + token.Pos(r.off),
136 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
137 NewText: []byte(replacement),
138 })
139 deletions = append(deletions, analysis.TextEdit{
140 Pos: lit.Pos() + token.Pos(r.off),
141 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)),
142 })
143 }
144 edit := analysis.SuggestedFix{
145 Message: fmt.Sprintf("replace all %s characters with escape sequences", kind),
146 TextEdits: edits,
147 }
148 delete := analysis.SuggestedFix{
149 Message: fmt.Sprintf("delete all %s characters", kind),
150 TextEdits: deletions,
151 }
152 report.Report(pass, lit, msg, report.Fixes(edit, delete))
153 }
154 }
155 code.Preorder(pass, fn, (*ast.BasicLit)(nil))
156 return nil, nil
157 }
158