context.go raw

   1  // Copyright 2014 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package cases
   6  
   7  import "golang.org/x/text/transform"
   8  
   9  // A context is used for iterating over source bytes, fetching case info and
  10  // writing to a destination buffer.
  11  //
  12  // Casing operations may need more than one rune of context to decide how a rune
  13  // should be cased. Casing implementations should call checkpoint on context
  14  // whenever it is known to be safe to return the runes processed so far.
  15  //
  16  // It is recommended for implementations to not allow for more than 30 case
  17  // ignorables as lookahead (analogous to the limit in norm) and to use state if
  18  // unbounded lookahead is needed for cased runes.
  19  type context struct {
  20  	dst, src []byte
  21  	atEOF    bool
  22  
  23  	pDst int // pDst points past the last written rune in dst.
  24  	pSrc int // pSrc points to the start of the currently scanned rune.
  25  
  26  	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
  27  	nDst, nSrc int
  28  	err        error
  29  
  30  	sz   int  // size of current rune
  31  	info info // case information of currently scanned rune
  32  
  33  	// State preserved across calls to Transform.
  34  	isMidWord bool // false if next cased letter needs to be title-cased.
  35  }
  36  
  37  func (c *context) Reset() {
  38  	c.isMidWord = false
  39  }
  40  
  41  // ret returns the return values for the Transform method. It checks whether
  42  // there were insufficient bytes in src to complete and introduces an error
  43  // accordingly, if necessary.
  44  func (c *context) ret() (nDst, nSrc int, err error) {
  45  	if c.err != nil || c.nSrc == len(c.src) {
  46  		return c.nDst, c.nSrc, c.err
  47  	}
  48  	// This point is only reached by mappers if there was no short destination
  49  	// buffer. This means that the source buffer was exhausted and that c.sz was
  50  	// set to 0 by next.
  51  	if c.atEOF && c.pSrc == len(c.src) {
  52  		return c.pDst, c.pSrc, nil
  53  	}
  54  	return c.nDst, c.nSrc, transform.ErrShortSrc
  55  }
  56  
  57  // retSpan returns the return values for the Span method. It checks whether
  58  // there were insufficient bytes in src to complete and introduces an error
  59  // accordingly, if necessary.
  60  func (c *context) retSpan() (n int, err error) {
  61  	_, nSrc, err := c.ret()
  62  	return nSrc, err
  63  }
  64  
  65  // checkpoint sets the return value buffer points for Transform to the current
  66  // positions.
  67  func (c *context) checkpoint() {
  68  	if c.err == nil {
  69  		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
  70  	}
  71  }
  72  
  73  // unreadRune causes the last rune read by next to be reread on the next
  74  // invocation of next. Only one unreadRune may be called after a call to next.
  75  func (c *context) unreadRune() {
  76  	c.sz = 0
  77  }
  78  
  79  func (c *context) next() bool {
  80  	c.pSrc += c.sz
  81  	if c.pSrc == len(c.src) || c.err != nil {
  82  		c.info, c.sz = 0, 0
  83  		return false
  84  	}
  85  	v, sz := trie.lookup(c.src[c.pSrc:])
  86  	c.info, c.sz = info(v), sz
  87  	if c.sz == 0 {
  88  		if c.atEOF {
  89  			// A zero size means we have an incomplete rune. If we are atEOF,
  90  			// this means it is an illegal rune, which we will consume one
  91  			// byte at a time.
  92  			c.sz = 1
  93  		} else {
  94  			c.err = transform.ErrShortSrc
  95  			return false
  96  		}
  97  	}
  98  	return true
  99  }
 100  
 101  // writeBytes adds bytes to dst.
 102  func (c *context) writeBytes(b []byte) bool {
 103  	if len(c.dst)-c.pDst < len(b) {
 104  		c.err = transform.ErrShortDst
 105  		return false
 106  	}
 107  	// This loop is faster than using copy.
 108  	for _, ch := range b {
 109  		c.dst[c.pDst] = ch
 110  		c.pDst++
 111  	}
 112  	return true
 113  }
 114  
 115  // writeString writes the given string to dst.
 116  func (c *context) writeString(s string) bool {
 117  	if len(c.dst)-c.pDst < len(s) {
 118  		c.err = transform.ErrShortDst
 119  		return false
 120  	}
 121  	// This loop is faster than using copy.
 122  	for i := 0; i < len(s); i++ {
 123  		c.dst[c.pDst] = s[i]
 124  		c.pDst++
 125  	}
 126  	return true
 127  }
 128  
 129  // copy writes the current rune to dst.
 130  func (c *context) copy() bool {
 131  	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
 132  }
 133  
 134  // copyXOR copies the current rune to dst and modifies it by applying the XOR
 135  // pattern of the case info. It is the responsibility of the caller to ensure
 136  // that this is a rune with a XOR pattern defined.
 137  func (c *context) copyXOR() bool {
 138  	if !c.copy() {
 139  		return false
 140  	}
 141  	if c.info&xorIndexBit == 0 {
 142  		// Fast path for 6-bit XOR pattern, which covers most cases.
 143  		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
 144  	} else {
 145  		// Interpret XOR bits as an index.
 146  		// TODO: test performance for unrolling this loop. Verify that we have
 147  		// at least two bytes and at most three.
 148  		idx := c.info >> xorShift
 149  		for p := c.pDst - 1; ; p-- {
 150  			c.dst[p] ^= xorData[idx]
 151  			idx--
 152  			if xorData[idx] == 0 {
 153  				break
 154  			}
 155  		}
 156  	}
 157  	return true
 158  }
 159  
 160  // hasPrefix returns true if src[pSrc:] starts with the given string.
 161  func (c *context) hasPrefix(s string) bool {
 162  	b := c.src[c.pSrc:]
 163  	if len(b) < len(s) {
 164  		return false
 165  	}
 166  	for i, c := range b[:len(s)] {
 167  		if c != s[i] {
 168  			return false
 169  		}
 170  	}
 171  	return true
 172  }
 173  
 174  // caseType returns an info with only the case bits, normalized to either
 175  // cLower, cUpper, cTitle or cUncased.
 176  func (c *context) caseType() info {
 177  	cm := c.info & 0x7
 178  	if cm < 4 {
 179  		return cm
 180  	}
 181  	if cm >= cXORCase {
 182  		// xor the last bit of the rune with the case type bits.
 183  		b := c.src[c.pSrc+c.sz-1]
 184  		return info(b&1) ^ cm&0x3
 185  	}
 186  	if cm == cIgnorableCased {
 187  		return cLower
 188  	}
 189  	return cUncased
 190  }
 191  
 192  // lower writes the lowercase version of the current rune to dst.
 193  func lower(c *context) bool {
 194  	ct := c.caseType()
 195  	if c.info&hasMappingMask == 0 || ct == cLower {
 196  		return c.copy()
 197  	}
 198  	if c.info&exceptionBit == 0 {
 199  		return c.copyXOR()
 200  	}
 201  	e := exceptions[c.info>>exceptionShift:]
 202  	offset := 2 + e[0]&lengthMask // size of header + fold string
 203  	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
 204  		return c.writeString(e[offset : offset+nLower])
 205  	}
 206  	return c.copy()
 207  }
 208  
 209  func isLower(c *context) bool {
 210  	ct := c.caseType()
 211  	if c.info&hasMappingMask == 0 || ct == cLower {
 212  		return true
 213  	}
 214  	if c.info&exceptionBit == 0 {
 215  		c.err = transform.ErrEndOfSpan
 216  		return false
 217  	}
 218  	e := exceptions[c.info>>exceptionShift:]
 219  	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
 220  		c.err = transform.ErrEndOfSpan
 221  		return false
 222  	}
 223  	return true
 224  }
 225  
 226  // upper writes the uppercase version of the current rune to dst.
 227  func upper(c *context) bool {
 228  	ct := c.caseType()
 229  	if c.info&hasMappingMask == 0 || ct == cUpper {
 230  		return c.copy()
 231  	}
 232  	if c.info&exceptionBit == 0 {
 233  		return c.copyXOR()
 234  	}
 235  	e := exceptions[c.info>>exceptionShift:]
 236  	offset := 2 + e[0]&lengthMask // size of header + fold string
 237  	// Get length of first special case mapping.
 238  	n := (e[1] >> lengthBits) & lengthMask
 239  	if ct == cTitle {
 240  		// The first special case mapping is for lower. Set n to the second.
 241  		if n == noChange {
 242  			n = 0
 243  		}
 244  		n, e = e[1]&lengthMask, e[n:]
 245  	}
 246  	if n != noChange {
 247  		return c.writeString(e[offset : offset+n])
 248  	}
 249  	return c.copy()
 250  }
 251  
 252  // isUpper writes the isUppercase version of the current rune to dst.
 253  func isUpper(c *context) bool {
 254  	ct := c.caseType()
 255  	if c.info&hasMappingMask == 0 || ct == cUpper {
 256  		return true
 257  	}
 258  	if c.info&exceptionBit == 0 {
 259  		c.err = transform.ErrEndOfSpan
 260  		return false
 261  	}
 262  	e := exceptions[c.info>>exceptionShift:]
 263  	// Get length of first special case mapping.
 264  	n := (e[1] >> lengthBits) & lengthMask
 265  	if ct == cTitle {
 266  		n = e[1] & lengthMask
 267  	}
 268  	if n != noChange {
 269  		c.err = transform.ErrEndOfSpan
 270  		return false
 271  	}
 272  	return true
 273  }
 274  
 275  // title writes the title case version of the current rune to dst.
 276  func title(c *context) bool {
 277  	ct := c.caseType()
 278  	if c.info&hasMappingMask == 0 || ct == cTitle {
 279  		return c.copy()
 280  	}
 281  	if c.info&exceptionBit == 0 {
 282  		if ct == cLower {
 283  			return c.copyXOR()
 284  		}
 285  		return c.copy()
 286  	}
 287  	// Get the exception data.
 288  	e := exceptions[c.info>>exceptionShift:]
 289  	offset := 2 + e[0]&lengthMask // size of header + fold string
 290  
 291  	nFirst := (e[1] >> lengthBits) & lengthMask
 292  	if nTitle := e[1] & lengthMask; nTitle != noChange {
 293  		if nFirst != noChange {
 294  			e = e[nFirst:]
 295  		}
 296  		return c.writeString(e[offset : offset+nTitle])
 297  	}
 298  	if ct == cLower && nFirst != noChange {
 299  		// Use the uppercase version instead.
 300  		return c.writeString(e[offset : offset+nFirst])
 301  	}
 302  	// Already in correct case.
 303  	return c.copy()
 304  }
 305  
 306  // isTitle reports whether the current rune is in title case.
 307  func isTitle(c *context) bool {
 308  	ct := c.caseType()
 309  	if c.info&hasMappingMask == 0 || ct == cTitle {
 310  		return true
 311  	}
 312  	if c.info&exceptionBit == 0 {
 313  		if ct == cLower {
 314  			c.err = transform.ErrEndOfSpan
 315  			return false
 316  		}
 317  		return true
 318  	}
 319  	// Get the exception data.
 320  	e := exceptions[c.info>>exceptionShift:]
 321  	if nTitle := e[1] & lengthMask; nTitle != noChange {
 322  		c.err = transform.ErrEndOfSpan
 323  		return false
 324  	}
 325  	nFirst := (e[1] >> lengthBits) & lengthMask
 326  	if ct == cLower && nFirst != noChange {
 327  		c.err = transform.ErrEndOfSpan
 328  		return false
 329  	}
 330  	return true
 331  }
 332  
 333  // foldFull writes the foldFull version of the current rune to dst.
 334  func foldFull(c *context) bool {
 335  	if c.info&hasMappingMask == 0 {
 336  		return c.copy()
 337  	}
 338  	ct := c.caseType()
 339  	if c.info&exceptionBit == 0 {
 340  		if ct != cLower || c.info&inverseFoldBit != 0 {
 341  			return c.copyXOR()
 342  		}
 343  		return c.copy()
 344  	}
 345  	e := exceptions[c.info>>exceptionShift:]
 346  	n := e[0] & lengthMask
 347  	if n == 0 {
 348  		if ct == cLower {
 349  			return c.copy()
 350  		}
 351  		n = (e[1] >> lengthBits) & lengthMask
 352  	}
 353  	return c.writeString(e[2 : 2+n])
 354  }
 355  
 356  // isFoldFull reports whether the current run is mapped to foldFull
 357  func isFoldFull(c *context) bool {
 358  	if c.info&hasMappingMask == 0 {
 359  		return true
 360  	}
 361  	ct := c.caseType()
 362  	if c.info&exceptionBit == 0 {
 363  		if ct != cLower || c.info&inverseFoldBit != 0 {
 364  			c.err = transform.ErrEndOfSpan
 365  			return false
 366  		}
 367  		return true
 368  	}
 369  	e := exceptions[c.info>>exceptionShift:]
 370  	n := e[0] & lengthMask
 371  	if n == 0 && ct == cLower {
 372  		return true
 373  	}
 374  	c.err = transform.ErrEndOfSpan
 375  	return false
 376  }
 377