language.go raw

   1  // Copyright 2013 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:generate go run gen.go gen_index.go -output tables.go
   6  //go:generate go run gen_parents.go
   7  
   8  package compact
   9  
  10  // TODO: Remove above NOTE after:
  11  // - verifying that tables are dropped correctly (most notably matcher tables).
  12  
  13  import (
  14  	"strings"
  15  
  16  	"golang.org/x/text/internal/language"
  17  )
  18  
  19  // Tag represents a BCP 47 language tag. It is used to specify an instance of a
  20  // specific language or locale. All language tag values are guaranteed to be
  21  // well-formed.
  22  type Tag struct {
  23  	// NOTE: exported tags will become part of the public API.
  24  	language ID
  25  	locale   ID
  26  	full     fullTag // always a language.Tag for now.
  27  }
  28  
  29  const _und = 0
  30  
  31  type fullTag interface {
  32  	IsRoot() bool
  33  	Parent() language.Tag
  34  }
  35  
  36  // Make a compact Tag from a fully specified internal language Tag.
  37  func Make(t language.Tag) (tag Tag) {
  38  	if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
  39  		if r, err := language.ParseRegion(region[:2]); err == nil {
  40  			tFull := t
  41  			t, _ = t.SetTypeForKey("rg", "")
  42  			// TODO: should we not consider "va" for the language tag?
  43  			var exact1, exact2 bool
  44  			tag.language, exact1 = FromTag(t)
  45  			t.RegionID = r
  46  			tag.locale, exact2 = FromTag(t)
  47  			if !exact1 || !exact2 {
  48  				tag.full = tFull
  49  			}
  50  			return tag
  51  		}
  52  	}
  53  	lang, ok := FromTag(t)
  54  	tag.language = lang
  55  	tag.locale = lang
  56  	if !ok {
  57  		tag.full = t
  58  	}
  59  	return tag
  60  }
  61  
  62  // Tag returns an internal language Tag version of this tag.
  63  func (t Tag) Tag() language.Tag {
  64  	if t.full != nil {
  65  		return t.full.(language.Tag)
  66  	}
  67  	tag := t.language.Tag()
  68  	if t.language != t.locale {
  69  		loc := t.locale.Tag()
  70  		tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
  71  	}
  72  	return tag
  73  }
  74  
  75  // IsCompact reports whether this tag is fully defined in terms of ID.
  76  func (t *Tag) IsCompact() bool {
  77  	return t.full == nil
  78  }
  79  
  80  // MayHaveVariants reports whether a tag may have variants. If it returns false
  81  // it is guaranteed the tag does not have variants.
  82  func (t Tag) MayHaveVariants() bool {
  83  	return t.full != nil || int(t.language) >= len(coreTags)
  84  }
  85  
  86  // MayHaveExtensions reports whether a tag may have extensions. If it returns
  87  // false it is guaranteed the tag does not have them.
  88  func (t Tag) MayHaveExtensions() bool {
  89  	return t.full != nil ||
  90  		int(t.language) >= len(coreTags) ||
  91  		t.language != t.locale
  92  }
  93  
  94  // IsRoot returns true if t is equal to language "und".
  95  func (t Tag) IsRoot() bool {
  96  	if t.full != nil {
  97  		return t.full.IsRoot()
  98  	}
  99  	return t.language == _und
 100  }
 101  
 102  // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
 103  // specific language are substituted with fields from the parent language.
 104  // The parent for a language may change for newer versions of CLDR.
 105  func (t Tag) Parent() Tag {
 106  	if t.full != nil {
 107  		return Make(t.full.Parent())
 108  	}
 109  	if t.language != t.locale {
 110  		// Simulate stripping -u-rg-xxxxxx
 111  		return Tag{language: t.language, locale: t.language}
 112  	}
 113  	// TODO: use parent lookup table once cycle from internal package is
 114  	// removed. Probably by internalizing the table and declaring this fast
 115  	// enough.
 116  	// lang := compactID(internal.Parent(uint16(t.language)))
 117  	lang, _ := FromTag(t.language.Tag().Parent())
 118  	return Tag{language: lang, locale: lang}
 119  }
 120  
 121  // nextToken returns token t and the rest of the string.
 122  func nextToken(s string) (t, tail string) {
 123  	p := strings.Index(s[1:], "-")
 124  	if p == -1 {
 125  		return s[1:], ""
 126  	}
 127  	p++
 128  	return s[1:p], s[p:]
 129  }
 130  
 131  // LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
 132  // for which data exists in the text repository.The index will change over time
 133  // and should not be stored in persistent storage. If t does not match a compact
 134  // index, exact will be false and the compact index will be returned for the
 135  // first match after repeatedly taking the Parent of t.
 136  func LanguageID(t Tag) (id ID, exact bool) {
 137  	return t.language, t.full == nil
 138  }
 139  
 140  // RegionalID returns the ID for the regional variant of this tag. This index is
 141  // used to indicate region-specific overrides, such as default currency, default
 142  // calendar and week data, default time cycle, and default measurement system
 143  // and unit preferences.
 144  //
 145  // For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
 146  // settings for currency, number formatting, etc. The CompactIndex for this tag
 147  // will be that for en-GB, while the RegionalID will be the one corresponding to
 148  // en-US.
 149  func RegionalID(t Tag) (id ID, exact bool) {
 150  	return t.locale, t.full == nil
 151  }
 152  
 153  // LanguageTag returns t stripped of regional variant indicators.
 154  //
 155  // At the moment this means it is stripped of a regional and variant subtag "rg"
 156  // and "va" in the "u" extension.
 157  func (t Tag) LanguageTag() Tag {
 158  	if t.full == nil {
 159  		return Tag{language: t.language, locale: t.language}
 160  	}
 161  	tt := t.Tag()
 162  	tt.SetTypeForKey("rg", "")
 163  	tt.SetTypeForKey("va", "")
 164  	return Make(tt)
 165  }
 166  
 167  // RegionalTag returns the regional variant of the tag.
 168  //
 169  // At the moment this means that the region is set from the regional subtag
 170  // "rg" in the "u" extension.
 171  func (t Tag) RegionalTag() Tag {
 172  	rt := Tag{language: t.locale, locale: t.locale}
 173  	if t.full == nil {
 174  		return rt
 175  	}
 176  	b := language.Builder{}
 177  	tag := t.Tag()
 178  	// tag, _ = tag.SetTypeForKey("rg", "")
 179  	b.SetTag(t.locale.Tag())
 180  	if v := tag.Variants(); v != "" {
 181  		for _, v := range strings.Split(v, "-") {
 182  			b.AddVariant(v)
 183  		}
 184  	}
 185  	for _, e := range tag.Extensions() {
 186  		b.AddExt(e)
 187  	}
 188  	return t
 189  }
 190  
 191  // FromTag reports closest matching ID for an internal language Tag.
 192  func FromTag(t language.Tag) (id ID, exact bool) {
 193  	// TODO: perhaps give more frequent tags a lower index.
 194  	// TODO: we could make the indexes stable. This will excluded some
 195  	//       possibilities for optimization, so don't do this quite yet.
 196  	exact = true
 197  
 198  	b, s, r := t.Raw()
 199  	if t.HasString() {
 200  		if t.IsPrivateUse() {
 201  			// We have no entries for user-defined tags.
 202  			return 0, false
 203  		}
 204  		hasExtra := false
 205  		if t.HasVariants() {
 206  			if t.HasExtensions() {
 207  				build := language.Builder{}
 208  				build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
 209  				build.AddVariant(t.Variants())
 210  				exact = false
 211  				t = build.Make()
 212  			}
 213  			hasExtra = true
 214  		} else if _, ok := t.Extension('u'); ok {
 215  			// TODO: va may mean something else. Consider not considering it.
 216  			// Strip all but the 'va' entry.
 217  			old := t
 218  			variant := t.TypeForKey("va")
 219  			t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
 220  			if variant != "" {
 221  				t, _ = t.SetTypeForKey("va", variant)
 222  				hasExtra = true
 223  			}
 224  			exact = old == t
 225  		} else {
 226  			exact = false
 227  		}
 228  		if hasExtra {
 229  			// We have some variants.
 230  			for i, s := range specialTags {
 231  				if s == t {
 232  					return ID(i + len(coreTags)), exact
 233  				}
 234  			}
 235  			exact = false
 236  		}
 237  	}
 238  	if x, ok := getCoreIndex(t); ok {
 239  		return x, exact
 240  	}
 241  	exact = false
 242  	if r != 0 && s == 0 {
 243  		// Deal with cases where an extra script is inserted for the region.
 244  		t, _ := t.Maximize()
 245  		if x, ok := getCoreIndex(t); ok {
 246  			return x, exact
 247  		}
 248  	}
 249  	for t = t.Parent(); t != root; t = t.Parent() {
 250  		// No variants specified: just compare core components.
 251  		// The key has the form lllssrrr, where l, s, and r are nibbles for
 252  		// respectively the langID, scriptID, and regionID.
 253  		if x, ok := getCoreIndex(t); ok {
 254  			return x, exact
 255  		}
 256  	}
 257  	return 0, exact
 258  }
 259  
 260  var root = language.Tag{}
 261