cond.go raw

   1  // Copyright 2015 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package runes
   6  
   7  import (
   8  	"unicode/utf8"
   9  
  10  	"golang.org/x/text/transform"
  11  )
  12  
  13  // Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
  14  // This is done for various reasons:
  15  // - To retain the semantics of the Nop transformer: if input is passed to a Nop
  16  //   one would expect it to be unchanged.
  17  // - It would be very expensive to pass a converted RuneError to a transformer:
  18  //   a transformer might need more source bytes after RuneError, meaning that
  19  //   the only way to pass it safely is to create a new buffer and manage the
  20  //   intermingling of RuneErrors and normal input.
  21  // - Many transformers leave ill-formed UTF-8 as is, so this is not
  22  //   inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
  23  //   logical consequence of the operation (as for Map) or if it otherwise would
  24  //   pose security concerns (as for Remove).
  25  // - An alternative would be to return an error on ill-formed UTF-8, but this
  26  //   would be inconsistent with other operations.
  27  
  28  // If returns a transformer that applies tIn to consecutive runes for which
  29  // s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
  30  // is called on tIn and tNotIn at the start of each run. A Nop transformer will
  31  // substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
  32  // to RuneError to determine which transformer to apply, but is passed as is to
  33  // the respective transformer.
  34  func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
  35  	if tIn == nil && tNotIn == nil {
  36  		return Transformer{transform.Nop}
  37  	}
  38  	if tIn == nil {
  39  		tIn = transform.Nop
  40  	}
  41  	if tNotIn == nil {
  42  		tNotIn = transform.Nop
  43  	}
  44  	sIn, ok := tIn.(transform.SpanningTransformer)
  45  	if !ok {
  46  		sIn = dummySpan{tIn}
  47  	}
  48  	sNotIn, ok := tNotIn.(transform.SpanningTransformer)
  49  	if !ok {
  50  		sNotIn = dummySpan{tNotIn}
  51  	}
  52  
  53  	a := &cond{
  54  		tIn:    sIn,
  55  		tNotIn: sNotIn,
  56  		f:      s.Contains,
  57  	}
  58  	a.Reset()
  59  	return Transformer{a}
  60  }
  61  
  62  type dummySpan struct{ transform.Transformer }
  63  
  64  func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
  65  	return 0, transform.ErrEndOfSpan
  66  }
  67  
  68  type cond struct {
  69  	tIn, tNotIn transform.SpanningTransformer
  70  	f           func(rune) bool
  71  	check       func(rune) bool               // current check to perform
  72  	t           transform.SpanningTransformer // current transformer to use
  73  }
  74  
  75  // Reset implements transform.Transformer.
  76  func (t *cond) Reset() {
  77  	t.check = t.is
  78  	t.t = t.tIn
  79  	t.t.Reset() // notIn will be reset on first usage.
  80  }
  81  
  82  func (t *cond) is(r rune) bool {
  83  	if t.f(r) {
  84  		return true
  85  	}
  86  	t.check = t.isNot
  87  	t.t = t.tNotIn
  88  	t.tNotIn.Reset()
  89  	return false
  90  }
  91  
  92  func (t *cond) isNot(r rune) bool {
  93  	if !t.f(r) {
  94  		return true
  95  	}
  96  	t.check = t.is
  97  	t.t = t.tIn
  98  	t.tIn.Reset()
  99  	return false
 100  }
 101  
 102  // This implementation of Span doesn't help all too much, but it needs to be
 103  // there to satisfy this package's Transformer interface.
 104  // TODO: there are certainly room for improvements, though. For example, if
 105  // t.t == transform.Nop (which will a common occurrence) it will save a bundle
 106  // to special-case that loop.
 107  func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
 108  	p := 0
 109  	for n < len(src) && err == nil {
 110  		// Don't process too much at a time as the Spanner that will be
 111  		// called on this block may terminate early.
 112  		const maxChunk = 4096
 113  		max := len(src)
 114  		if v := n + maxChunk; v < max {
 115  			max = v
 116  		}
 117  		atEnd := false
 118  		size := 0
 119  		current := t.t
 120  		for ; p < max; p += size {
 121  			r := rune(src[p])
 122  			if r < utf8.RuneSelf {
 123  				size = 1
 124  			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
 125  				if !atEOF && !utf8.FullRune(src[p:]) {
 126  					err = transform.ErrShortSrc
 127  					break
 128  				}
 129  			}
 130  			if !t.check(r) {
 131  				// The next rune will be the start of a new run.
 132  				atEnd = true
 133  				break
 134  			}
 135  		}
 136  		n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
 137  		n += n2
 138  		if err2 != nil {
 139  			return n, err2
 140  		}
 141  		// At this point either err != nil or t.check will pass for the rune at p.
 142  		p = n + size
 143  	}
 144  	return n, err
 145  }
 146  
 147  func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 148  	p := 0
 149  	for nSrc < len(src) && err == nil {
 150  		// Don't process too much at a time, as the work might be wasted if the
 151  		// destination buffer isn't large enough to hold the result or a
 152  		// transform returns an error early.
 153  		const maxChunk = 4096
 154  		max := len(src)
 155  		if n := nSrc + maxChunk; n < len(src) {
 156  			max = n
 157  		}
 158  		atEnd := false
 159  		size := 0
 160  		current := t.t
 161  		for ; p < max; p += size {
 162  			r := rune(src[p])
 163  			if r < utf8.RuneSelf {
 164  				size = 1
 165  			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
 166  				if !atEOF && !utf8.FullRune(src[p:]) {
 167  					err = transform.ErrShortSrc
 168  					break
 169  				}
 170  			}
 171  			if !t.check(r) {
 172  				// The next rune will be the start of a new run.
 173  				atEnd = true
 174  				break
 175  			}
 176  		}
 177  		nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
 178  		nDst += nDst2
 179  		nSrc += nSrc2
 180  		if err2 != nil {
 181  			return nDst, nSrc, err2
 182  		}
 183  		// At this point either err != nil or t.check will pass for the rune at p.
 184  		p = nSrc + size
 185  	}
 186  	return nDst, nSrc, err
 187  }
 188