cases.go raw

   1  // Copyright 2014 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:generate go run gen.go gen_trieval.go
   6  
   7  // Package cases provides general and language-specific case mappers.
   8  package cases // import "golang.org/x/text/cases"
   9  
  10  import (
  11  	"golang.org/x/text/language"
  12  	"golang.org/x/text/transform"
  13  )
  14  
  15  // References:
  16  // - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
  17  // - https://www.unicode.org/reports/tr29/
  18  // - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
  19  // - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
  20  // - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
  21  // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
  22  // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
  23  // - http://userguide.icu-project.org/transforms/casemappings
  24  
  25  // TODO:
  26  // - Case folding
  27  // - Wide and Narrow?
  28  // - Segmenter option for title casing.
  29  // - ASCII fast paths
  30  // - Encode Soft-Dotted property within trie somehow.
  31  
  32  // A Caser transforms given input to a certain case. It implements
  33  // transform.Transformer.
  34  //
  35  // A Caser may be stateful and should therefore not be shared between
  36  // goroutines.
  37  type Caser struct {
  38  	t transform.SpanningTransformer
  39  }
  40  
  41  // Bytes returns a new byte slice with the result of converting b to the case
  42  // form implemented by c.
  43  func (c Caser) Bytes(b []byte) []byte {
  44  	b, _, _ = transform.Bytes(c.t, b)
  45  	return b
  46  }
  47  
  48  // String returns a string with the result of transforming s to the case form
  49  // implemented by c.
  50  func (c Caser) String(s string) string {
  51  	s, _, _ = transform.String(c.t, s)
  52  	return s
  53  }
  54  
  55  // Reset resets the Caser to be reused for new input after a previous call to
  56  // Transform.
  57  func (c Caser) Reset() { c.t.Reset() }
  58  
  59  // Transform implements the transform.Transformer interface and transforms the
  60  // given input to the case form implemented by c.
  61  func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  62  	return c.t.Transform(dst, src, atEOF)
  63  }
  64  
  65  // Span implements the transform.SpanningTransformer interface.
  66  func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
  67  	return c.t.Span(src, atEOF)
  68  }
  69  
  70  // Upper returns a Caser for language-specific uppercasing.
  71  func Upper(t language.Tag, opts ...Option) Caser {
  72  	return Caser{makeUpper(t, getOpts(opts...))}
  73  }
  74  
  75  // Lower returns a Caser for language-specific lowercasing.
  76  func Lower(t language.Tag, opts ...Option) Caser {
  77  	return Caser{makeLower(t, getOpts(opts...))}
  78  }
  79  
  80  // Title returns a Caser for language-specific title casing. It uses an
  81  // approximation of the default Unicode Word Break algorithm.
  82  func Title(t language.Tag, opts ...Option) Caser {
  83  	return Caser{makeTitle(t, getOpts(opts...))}
  84  }
  85  
  86  // Fold returns a Caser that implements Unicode case folding. The returned Caser
  87  // is stateless and safe to use concurrently by multiple goroutines.
  88  //
  89  // Case folding does not normalize the input and may not preserve a normal form.
  90  // Use the collate or search package for more convenient and linguistically
  91  // sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
  92  // where security aspects are a concern.
  93  func Fold(opts ...Option) Caser {
  94  	return Caser{makeFold(getOpts(opts...))}
  95  }
  96  
  97  // An Option is used to modify the behavior of a Caser.
  98  type Option func(o options) options
  99  
 100  // TODO: consider these options to take a boolean as well, like FinalSigma.
 101  // The advantage of using this approach is that other providers of a lower-case
 102  // algorithm could set different defaults by prefixing a user-provided slice
 103  // of options with their own. This is handy, for instance, for the precis
 104  // package which would override the default to not handle the Greek final sigma.
 105  
 106  var (
 107  	// NoLower disables the lowercasing of non-leading letters for a title
 108  	// caser.
 109  	NoLower Option = noLower
 110  
 111  	// Compact omits mappings in case folding for characters that would grow the
 112  	// input. (Unimplemented.)
 113  	Compact Option = compact
 114  )
 115  
 116  // TODO: option to preserve a normal form, if applicable?
 117  
 118  type options struct {
 119  	noLower bool
 120  	simple  bool
 121  
 122  	// TODO: segmenter, max ignorable, alternative versions, etc.
 123  
 124  	ignoreFinalSigma bool
 125  }
 126  
 127  func getOpts(o ...Option) (res options) {
 128  	for _, f := range o {
 129  		res = f(res)
 130  	}
 131  	return
 132  }
 133  
 134  func noLower(o options) options {
 135  	o.noLower = true
 136  	return o
 137  }
 138  
 139  func compact(o options) options {
 140  	o.simple = true
 141  	return o
 142  }
 143  
 144  // HandleFinalSigma specifies whether the special handling of Greek final sigma
 145  // should be enabled. Unicode prescribes handling the Greek final sigma for all
 146  // locales, but standards like IDNA and PRECIS override this default.
 147  func HandleFinalSigma(enable bool) Option {
 148  	if enable {
 149  		return handleFinalSigma
 150  	}
 151  	return ignoreFinalSigma
 152  }
 153  
 154  func ignoreFinalSigma(o options) options {
 155  	o.ignoreFinalSigma = true
 156  	return o
 157  }
 158  
 159  func handleFinalSigma(o options) options {
 160  	o.ignoreFinalSigma = false
 161  	return o
 162  }
 163