strings.go raw

   1  // Copyright 2019 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package strs provides string manipulation functionality specific to protobuf.
   6  package strs
   7  
   8  import (
   9  	"go/token"
  10  	"strings"
  11  	"unicode"
  12  	"unicode/utf8"
  13  
  14  	"google.golang.org/protobuf/internal/flags"
  15  	"google.golang.org/protobuf/reflect/protoreflect"
  16  )
  17  
  18  // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
  19  func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
  20  	if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions {
  21  		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
  22  			return fd.EnforceUTF8()
  23  		}
  24  	}
  25  	return fd.Syntax() == protoreflect.Proto3
  26  }
  27  
  28  // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
  29  //
  30  // If there is an interior underscore followed by a lower case letter,
  31  // drop the underscore and convert the letter to upper case.
  32  func GoCamelCase(s string) string {
  33  	// Invariant: if the next letter is lower case, it must be converted
  34  	// to upper case.
  35  	// That is, we process a word at a time, where words are marked by _ or
  36  	// upper case letter. Digits are treated as words.
  37  	var b []byte
  38  	for i := 0; i < len(s); i++ {
  39  		c := s[i]
  40  		switch {
  41  		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
  42  			// Skip over '.' in ".{{lowercase}}".
  43  		case c == '.':
  44  			b = append(b, '_') // convert '.' to '_'
  45  		case c == '_' && (i == 0 || s[i-1] == '.'):
  46  			// Convert initial '_' to ensure we start with a capital letter.
  47  			// Do the same for '_' after '.' to match historic behavior.
  48  			b = append(b, 'X') // convert '_' to 'X'
  49  		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
  50  			// Skip over '_' in "_{{lowercase}}".
  51  		case isASCIIDigit(c):
  52  			b = append(b, c)
  53  		default:
  54  			// Assume we have a letter now - if not, it's a bogus identifier.
  55  			// The next word is a sequence of characters that must start upper case.
  56  			if isASCIILower(c) {
  57  				c -= 'a' - 'A' // convert lowercase to uppercase
  58  			}
  59  			b = append(b, c)
  60  
  61  			// Accept lower case sequence that follows.
  62  			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
  63  				b = append(b, s[i+1])
  64  			}
  65  		}
  66  	}
  67  	return string(b)
  68  }
  69  
  70  // GoSanitized converts a string to a valid Go identifier.
  71  func GoSanitized(s string) string {
  72  	// Sanitize the input to the set of valid characters,
  73  	// which must be '_' or be in the Unicode L or N categories.
  74  	s = strings.Map(func(r rune) rune {
  75  		if unicode.IsLetter(r) || unicode.IsDigit(r) {
  76  			return r
  77  		}
  78  		return '_'
  79  	}, s)
  80  
  81  	// Prepend '_' in the event of a Go keyword conflict or if
  82  	// the identifier is invalid (does not start in the Unicode L category).
  83  	r, _ := utf8.DecodeRuneInString(s)
  84  	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
  85  		return "_" + s
  86  	}
  87  	return s
  88  }
  89  
  90  // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
  91  // according to the protobuf JSON specification.
  92  func JSONCamelCase(s string) string {
  93  	var b []byte
  94  	var wasUnderscore bool
  95  	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
  96  		c := s[i]
  97  		if c != '_' {
  98  			if wasUnderscore && isASCIILower(c) {
  99  				c -= 'a' - 'A' // convert to uppercase
 100  			}
 101  			b = append(b, c)
 102  		}
 103  		wasUnderscore = c == '_'
 104  	}
 105  	return string(b)
 106  }
 107  
 108  // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
 109  // according to the protobuf JSON specification.
 110  func JSONSnakeCase(s string) string {
 111  	var b []byte
 112  	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
 113  		c := s[i]
 114  		if isASCIIUpper(c) {
 115  			b = append(b, '_')
 116  			c += 'a' - 'A' // convert to lowercase
 117  		}
 118  		b = append(b, c)
 119  	}
 120  	return string(b)
 121  }
 122  
 123  // MapEntryName derives the name of the map entry message given the field name.
 124  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
 125  func MapEntryName(s string) string {
 126  	var b []byte
 127  	upperNext := true
 128  	for _, c := range s {
 129  		switch {
 130  		case c == '_':
 131  			upperNext = true
 132  		case upperNext:
 133  			b = append(b, byte(unicode.ToUpper(c)))
 134  			upperNext = false
 135  		default:
 136  			b = append(b, byte(c))
 137  		}
 138  	}
 139  	b = append(b, "Entry"...)
 140  	return string(b)
 141  }
 142  
 143  // EnumValueName derives the camel-cased enum value name.
 144  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
 145  func EnumValueName(s string) string {
 146  	var b []byte
 147  	upperNext := true
 148  	for _, c := range s {
 149  		switch {
 150  		case c == '_':
 151  			upperNext = true
 152  		case upperNext:
 153  			b = append(b, byte(unicode.ToUpper(c)))
 154  			upperNext = false
 155  		default:
 156  			b = append(b, byte(unicode.ToLower(c)))
 157  			upperNext = false
 158  		}
 159  	}
 160  	return string(b)
 161  }
 162  
 163  // TrimEnumPrefix trims the enum name prefix from an enum value name,
 164  // where the prefix is all lowercase without underscores.
 165  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
 166  func TrimEnumPrefix(s, prefix string) string {
 167  	s0 := s // original input
 168  	for len(s) > 0 && len(prefix) > 0 {
 169  		if s[0] == '_' {
 170  			s = s[1:]
 171  			continue
 172  		}
 173  		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
 174  			return s0 // no prefix match
 175  		}
 176  		s, prefix = s[1:], prefix[1:]
 177  	}
 178  	if len(prefix) > 0 {
 179  		return s0 // no prefix match
 180  	}
 181  	s = strings.TrimLeft(s, "_")
 182  	if len(s) == 0 {
 183  		return s0 // avoid returning empty string
 184  	}
 185  	return s
 186  }
 187  
 188  func isASCIILower(c byte) bool {
 189  	return 'a' <= c && c <= 'z'
 190  }
 191  func isASCIIUpper(c byte) bool {
 192  	return 'A' <= c && c <= 'Z'
 193  }
 194  func isASCIIDigit(c byte) bool {
 195  	return '0' <= c && c <= '9'
 196  }
 197