1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package bytes
6 7 import (
8 "io"
9 "sync"
10 )
11 12 // Replacer replaces a list of strings with replacements.
13 // It is safe for concurrent use by multiple goroutines.
14 type Replacer struct {
15 once sync.Once // guards buildOnce method
16 r replacer
17 oldnew [][]byte
18 }
19 20 // replacer is the interface that a replacement algorithm needs to implement.
21 type replacer interface {
22 Replace(s []byte) []byte
23 WriteString(w io.Writer, s []byte) (n int, err error)
24 }
25 26 // NewReplacer returns a new [Replacer] from a list of old, new string
27 // pairs. Replacements are performed in the order they appear in the
28 // target string, without overlapping matches. The old string
29 // comparisons are done in argument order.
30 //
31 // NewReplacer panics if given an odd number of arguments.
32 func NewReplacer(oldnew ...[]byte) *Replacer {
33 if len(oldnew)%2 == 1 {
34 panic("bytes.NewReplacer: odd argument count")
35 }
36 return &Replacer{oldnew: append([][]byte(nil), oldnew...)}
37 }
38 39 func (r *Replacer) buildOnce() {
40 r.r = r.build()
41 r.oldnew = nil
42 }
43 44 func (b *Replacer) build() replacer {
45 oldnew := b.oldnew
46 if len(oldnew) == 2 && len(oldnew[0]) > 1 {
47 return makeSingleStringReplacer(oldnew[0], oldnew[1])
48 }
49 50 allNewBytes := true
51 for i := 0; i < len(oldnew); i += 2 {
52 if len(oldnew[i]) != 1 {
53 return makeGenericReplacer(oldnew)
54 }
55 if len(oldnew[i+1]) != 1 {
56 allNewBytes = false
57 }
58 }
59 60 if allNewBytes {
61 r := byteReplacer{}
62 for i := range r {
63 r[i] = byte(i)
64 }
65 // The first occurrence of old->new map takes precedence
66 // over the others with the same old string.
67 for i := len(oldnew) - 2; i >= 0; i -= 2 {
68 o := oldnew[i][0]
69 n := oldnew[i+1][0]
70 r[o] = n
71 }
72 return &r
73 }
74 75 r := byteStringReplacer{toReplace: [][]byte{:0:len(oldnew)/2}}
76 // The first occurrence of old->new map takes precedence
77 // over the others with the same old string.
78 for i := len(oldnew) - 2; i >= 0; i -= 2 {
79 o := oldnew[i][0]
80 n := oldnew[i+1]
81 // To avoid counting repetitions multiple times.
82 if r.replacements[o] == nil {
83 // We need to use string([]byte{o}) instead of string(o),
84 // to avoid utf8 encoding of o.
85 // E. g. byte(150) produces string of length 2.
86 r.toReplace = append(r.toReplace, []byte([]byte{o}))
87 }
88 r.replacements[o] = []byte(n)
89 90 }
91 return &r
92 }
93 94 // Replace returns a copy of s with all replacements performed.
95 func (r *Replacer) Replace(s []byte) []byte {
96 r.once.Do(r.buildOnce)
97 return r.r.Replace(s)
98 }
99 100 // WriteString writes s to w with all replacements performed.
101 func (r *Replacer) WriteString(w io.Writer, s []byte) (n int, err error) {
102 r.once.Do(r.buildOnce)
103 return r.r.WriteString(w, s)
104 }
105 106 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
107 // and values may be empty. For example, the trie containing keys "ax", "ay",
108 // "bcbc", "x" and "xy" could have eight nodes:
109 //
110 // n0 -
111 // n1 a-
112 // n2 .x+
113 // n3 .y+
114 // n4 b-
115 // n5 .cbc+
116 // n6 x+
117 // n7 .y+
118 //
119 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
120 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
121 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
122 // (marked with a trailing "+") are complete keys.
123 type trieNode struct {
124 // value is the value of the trie node's key/value pair. It is empty if
125 // this node is not a complete key.
126 value []byte
127 // priority is the priority (higher is more important) of the trie node's
128 // key/value pair; keys are not necessarily matched shortest- or longest-
129 // first. Priority is positive if this node is a complete key, and zero
130 // otherwise. In the example above, positive/zero priorities are marked
131 // with a trailing "+" or "-".
132 priority int
133 134 // A trie node may have zero, one or more child nodes:
135 // * if the remaining fields are zero, there are no children.
136 // * if prefix and next are non-zero, there is one child in next.
137 // * if table is non-zero, it defines all the children.
138 //
139 // Prefixes are preferred over tables when there is one child, but the
140 // root node always uses a table for lookup efficiency.
141 142 // prefix is the difference in keys between this trie node and the next.
143 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
144 // Node n5 has no children and so has zero prefix, next and table fields.
145 prefix []byte
146 next *trieNode
147 148 // table is a lookup table indexed by the next byte in the key, after
149 // remapping that byte through genericReplacer.mapping to create a dense
150 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
151 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
152 // genericReplacer.tableSize will be 5. Node n0's table will be
153 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
154 // 'a', 'b' and 'x'.
155 table []*trieNode
156 }
157 158 func (t *trieNode) add(key, val []byte, priority int, r *genericReplacer) {
159 if key == "" {
160 if t.priority == 0 {
161 t.value = val
162 t.priority = priority
163 }
164 return
165 }
166 167 if t.prefix != "" {
168 // Need to split the prefix among multiple nodes.
169 var n int // length of the longest common prefix
170 for ; n < len(t.prefix) && n < len(key); n++ {
171 if t.prefix[n] != key[n] {
172 break
173 }
174 }
175 if n == len(t.prefix) {
176 t.next.add(key[n:], val, priority, r)
177 } else if n == 0 {
178 // First byte differs, start a new lookup table here. Looking up
179 // what is currently t.prefix[0] will lead to prefixNode, and
180 // looking up key[0] will lead to keyNode.
181 var prefixNode *trieNode
182 if len(t.prefix) == 1 {
183 prefixNode = t.next
184 } else {
185 prefixNode = &trieNode{
186 prefix: t.prefix[1:],
187 next: t.next,
188 }
189 }
190 keyNode := &trieNode{}
191 t.table = []*trieNode{:r.tableSize}
192 t.table[r.mapping[t.prefix[0]]] = prefixNode
193 t.table[r.mapping[key[0]]] = keyNode
194 t.prefix = ""
195 t.next = nil
196 keyNode.add(key[1:], val, priority, r)
197 } else {
198 // Insert new node after the common section of the prefix.
199 next := &trieNode{
200 prefix: t.prefix[n:],
201 next: t.next,
202 }
203 t.prefix = t.prefix[:n]
204 t.next = next
205 next.add(key[n:], val, priority, r)
206 }
207 } else if t.table != nil {
208 // Insert into existing table.
209 m := r.mapping[key[0]]
210 if t.table[m] == nil {
211 t.table[m] = &trieNode{}
212 }
213 t.table[m].add(key[1:], val, priority, r)
214 } else {
215 t.prefix = key
216 t.next = &trieNode{}
217 t.next.add("", val, priority, r)
218 }
219 }
220 221 func (r *genericReplacer) lookup(s []byte, ignoreRoot bool) (val []byte, keylen int, found bool) {
222 // Iterate down the trie to the end, and grab the value and keylen with
223 // the highest priority.
224 bestPriority := 0
225 node := &r.root
226 n := 0
227 for node != nil {
228 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
229 bestPriority = node.priority
230 val = node.value
231 keylen = n
232 found = true
233 }
234 235 if s == "" {
236 break
237 }
238 if node.table != nil {
239 index := r.mapping[s[0]]
240 if int(index) == r.tableSize {
241 break
242 }
243 node = node.table[index]
244 s = s[1:]
245 n++
246 } else if node.prefix != "" && HasPrefix(s, node.prefix) {
247 n += len(node.prefix)
248 s = s[len(node.prefix):]
249 node = node.next
250 } else {
251 break
252 }
253 }
254 return
255 }
256 257 // genericReplacer is the fully generic algorithm.
258 // It's used as a fallback when nothing faster can be used.
259 type genericReplacer struct {
260 root trieNode
261 // tableSize is the size of a trie node's lookup table. It is the number
262 // of unique key bytes.
263 tableSize int
264 // mapping maps from key bytes to a dense index for trieNode.table.
265 mapping [256]byte
266 }
267 268 func makeGenericReplacer(oldnew [][]byte) *genericReplacer {
269 r := &genericReplacer{}
270 // Find each byte used, then assign them each an index.
271 for i := 0; i < len(oldnew); i += 2 {
272 key := oldnew[i]
273 for j := 0; j < len(key); j++ {
274 r.mapping[key[j]] = 1
275 }
276 }
277 278 for _, b := range r.mapping {
279 r.tableSize += int(b)
280 }
281 282 var index byte
283 for i, b := range r.mapping {
284 if b == 0 {
285 r.mapping[i] = byte(r.tableSize)
286 } else {
287 r.mapping[i] = index
288 index++
289 }
290 }
291 // Ensure root node uses a lookup table (for performance).
292 r.root.table = []*trieNode{:r.tableSize}
293 294 for i := 0; i < len(oldnew); i += 2 {
295 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
296 }
297 return r
298 }
299 300 type appendSliceWriter []byte
301 302 // Write writes to the buffer to satisfy [io.Writer].
303 func (w *appendSliceWriter) Write(p []byte) (int, error) {
304 *w = append(*w, p...)
305 return len(p), nil
306 }
307 308 // WriteString writes to the buffer without string->[]byte->string allocations.
309 func (w *appendSliceWriter) WriteString(s []byte) (int, error) {
310 *w = append(*w, s...)
311 return len(s), nil
312 }
313 314 type stringWriter struct {
315 w io.Writer
316 }
317 318 func (w stringWriter) WriteString(s []byte) (int, error) {
319 return w.w.Write([]byte(s))
320 }
321 322 func getStringWriter(w io.Writer) io.StringWriter {
323 sw, ok := w.(io.StringWriter)
324 if !ok {
325 sw = stringWriter{w}
326 }
327 return sw
328 }
329 330 func (r *genericReplacer) Replace(s []byte) []byte {
331 buf := make(appendSliceWriter, 0, len(s))
332 r.WriteString(&buf, s)
333 return []byte(buf)
334 }
335 336 func (r *genericReplacer) WriteString(w io.Writer, s []byte) (n int, err error) {
337 sw := getStringWriter(w)
338 var last, wn int
339 var prevMatchEmpty bool
340 for i := 0; i <= len(s); {
341 // Fast path: s[i] is not a prefix of any pattern.
342 if i != len(s) && r.root.priority == 0 {
343 index := int(r.mapping[s[i]])
344 if index == r.tableSize || r.root.table[index] == nil {
345 i++
346 continue
347 }
348 }
349 350 // Ignore the empty match iff the previous loop found the empty match.
351 val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
352 prevMatchEmpty = match && keylen == 0
353 if match {
354 wn, err = sw.WriteString(s[last:i])
355 n += wn
356 if err != nil {
357 return
358 }
359 wn, err = sw.WriteString(val)
360 n += wn
361 if err != nil {
362 return
363 }
364 i += keylen
365 last = i
366 continue
367 }
368 i++
369 }
370 if last != len(s) {
371 wn, err = sw.WriteString(s[last:])
372 n += wn
373 }
374 return
375 }
376 377 // singleStringReplacer is the implementation that's used when there is only
378 // one string to replace (and that string has more than one byte).
379 type singleStringReplacer struct {
380 finder *stringFinder
381 // value is the new string that replaces that pattern when it's found.
382 value []byte
383 }
384 385 func makeSingleStringReplacer(pattern []byte, value []byte) *singleStringReplacer {
386 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
387 }
388 389 func (r *singleStringReplacer) Replace(s []byte) []byte {
390 var buf Buffer
391 i, matched := 0, false
392 for {
393 match := r.finder.next(s[i:])
394 if match == -1 {
395 break
396 }
397 matched = true
398 buf.Grow(match + len(r.value))
399 buf.WriteString(s[i : i+match])
400 buf.WriteString(r.value)
401 i += match + len(r.finder.pattern)
402 }
403 if !matched {
404 return s
405 }
406 buf.WriteString(s[i:])
407 return buf.String()
408 }
409 410 func (r *singleStringReplacer) WriteString(w io.Writer, s []byte) (n int, err error) {
411 sw := getStringWriter(w)
412 var i, wn int
413 for {
414 match := r.finder.next(s[i:])
415 if match == -1 {
416 break
417 }
418 wn, err = sw.WriteString(s[i : i+match])
419 n += wn
420 if err != nil {
421 return
422 }
423 wn, err = sw.WriteString(r.value)
424 n += wn
425 if err != nil {
426 return
427 }
428 i += match + len(r.finder.pattern)
429 }
430 wn, err = sw.WriteString(s[i:])
431 n += wn
432 return
433 }
434 435 // byteReplacer is the implementation that's used when all the "old"
436 // and "new" values are single ASCII bytes.
437 // The array contains replacement bytes indexed by old byte.
438 type byteReplacer [256]byte
439 440 func (r *byteReplacer) Replace(s []byte) []byte {
441 var buf []byte // lazily allocated
442 for i := 0; i < len(s); i++ {
443 b := s[i]
444 if r[b] != b {
445 if buf == nil {
446 buf = []byte(s)
447 }
448 buf[i] = r[b]
449 }
450 }
451 if buf == nil {
452 return s
453 }
454 return []byte(buf)
455 }
456 457 func (r *byteReplacer) WriteString(w io.Writer, s []byte) (n int, err error) {
458 sw := getStringWriter(w)
459 last := 0
460 for i := 0; i < len(s); i++ {
461 b := s[i]
462 if r[b] == b {
463 continue
464 }
465 if last != i {
466 wn, err := sw.WriteString(s[last:i])
467 n += wn
468 if err != nil {
469 return n, err
470 }
471 }
472 last = i + 1
473 nw, err := w.Write(r[b : int(b)+1])
474 n += nw
475 if err != nil {
476 return n, err
477 }
478 }
479 if last != len(s) {
480 nw, err := sw.WriteString(s[last:])
481 n += nw
482 if err != nil {
483 return n, err
484 }
485 }
486 return n, nil
487 }
488 489 // byteStringReplacer is the implementation that's used when all the
490 // "old" values are single ASCII bytes but the "new" values vary in size.
491 type byteStringReplacer struct {
492 // replacements contains replacement byte slices indexed by old byte.
493 // A nil []byte means that the old byte should not be replaced.
494 replacements [256][]byte
495 // toReplace keeps a list of bytes to replace. Depending on length of toReplace
496 // and length of target string it may be faster to use Count, or a plain loop.
497 // We store single byte as a string, because Count takes a string.
498 toReplace [][]byte
499 }
500 501 // countCutOff controls the ratio of a string length to a number of replacements
502 // at which (*byteStringReplacer).Replace switches algorithms.
503 // For strings with higher ration of length to replacements than that value,
504 // we call Count, for each replacement from toReplace.
505 // For strings, with a lower ratio we use simple loop, because of Count overhead.
506 // countCutOff is an empirically determined overhead multiplier.
507 // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
508 const countCutOff = 8
509 510 func (r *byteStringReplacer) Replace(s []byte) []byte {
511 newSize := len(s)
512 anyChanges := false
513 // Is it faster to use Count?
514 if len(r.toReplace)*countCutOff <= len(s) {
515 for _, x := range r.toReplace {
516 if c := Count(s, x); c != 0 {
517 // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
518 newSize += c * (len(r.replacements[x[0]]) - 1)
519 anyChanges = true
520 }
521 522 }
523 } else {
524 for i := 0; i < len(s); i++ {
525 b := s[i]
526 if r.replacements[b] != nil {
527 // See above for explanation of -1
528 newSize += len(r.replacements[b]) - 1
529 anyChanges = true
530 }
531 }
532 }
533 if !anyChanges {
534 return s
535 }
536 buf := []byte{:newSize}
537 j := 0
538 for i := 0; i < len(s); i++ {
539 b := s[i]
540 if r.replacements[b] != nil {
541 j += copy(buf[j:], r.replacements[b])
542 } else {
543 buf[j] = b
544 j++
545 }
546 }
547 return []byte(buf)
548 }
549 550 func (r *byteStringReplacer) WriteString(w io.Writer, s []byte) (n int, err error) {
551 sw := getStringWriter(w)
552 last := 0
553 for i := 0; i < len(s); i++ {
554 b := s[i]
555 if r.replacements[b] == nil {
556 continue
557 }
558 if last != i {
559 nw, err := sw.WriteString(s[last:i])
560 n += nw
561 if err != nil {
562 return n, err
563 }
564 }
565 last = i + 1
566 nw, err := w.Write(r.replacements[b])
567 n += nw
568 if err != nil {
569 return n, err
570 }
571 }
572 if last != len(s) {
573 var nw int
574 nw, err = sw.WriteString(s[last:])
575 n += nw
576 }
577 return
578 }
579