match.go raw

   1  // Copyright 2013 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package language
   6  
   7  import "errors"
   8  
   9  type scriptRegionFlags uint8
  10  
  11  const (
  12  	isList = 1 << iota
  13  	scriptInFrom
  14  	regionInFrom
  15  )
  16  
  17  func (t *Tag) setUndefinedLang(id Language) {
  18  	if t.LangID == 0 {
  19  		t.LangID = id
  20  	}
  21  }
  22  
  23  func (t *Tag) setUndefinedScript(id Script) {
  24  	if t.ScriptID == 0 {
  25  		t.ScriptID = id
  26  	}
  27  }
  28  
  29  func (t *Tag) setUndefinedRegion(id Region) {
  30  	if t.RegionID == 0 || t.RegionID.Contains(id) {
  31  		t.RegionID = id
  32  	}
  33  }
  34  
  35  // ErrMissingLikelyTagsData indicates no information was available
  36  // to compute likely values of missing tags.
  37  var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
  38  
  39  // addLikelySubtags sets subtags to their most likely value, given the locale.
  40  // In most cases this means setting fields for unknown values, but in some
  41  // cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
  42  // if the given locale cannot be expanded.
  43  func (t Tag) addLikelySubtags() (Tag, error) {
  44  	id, err := addTags(t)
  45  	if err != nil {
  46  		return t, err
  47  	} else if id.equalTags(t) {
  48  		return t, nil
  49  	}
  50  	id.RemakeString()
  51  	return id, nil
  52  }
  53  
  54  // specializeRegion attempts to specialize a group region.
  55  func specializeRegion(t *Tag) bool {
  56  	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
  57  		x := likelyRegionGroup[i]
  58  		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
  59  			t.RegionID = Region(x.region)
  60  		}
  61  		return true
  62  	}
  63  	return false
  64  }
  65  
  66  // Maximize returns a new tag with missing tags filled in.
  67  func (t Tag) Maximize() (Tag, error) {
  68  	return addTags(t)
  69  }
  70  
  71  func addTags(t Tag) (Tag, error) {
  72  	// We leave private use identifiers alone.
  73  	if t.IsPrivateUse() {
  74  		return t, nil
  75  	}
  76  	if t.ScriptID != 0 && t.RegionID != 0 {
  77  		if t.LangID != 0 {
  78  			// already fully specified
  79  			specializeRegion(&t)
  80  			return t, nil
  81  		}
  82  		// Search matches for und-script-region. Note that for these cases
  83  		// region will never be a group so there is no need to check for this.
  84  		list := likelyRegion[t.RegionID : t.RegionID+1]
  85  		if x := list[0]; x.flags&isList != 0 {
  86  			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
  87  		}
  88  		for _, x := range list {
  89  			// Deviating from the spec. See match_test.go for details.
  90  			if Script(x.script) == t.ScriptID {
  91  				t.setUndefinedLang(Language(x.lang))
  92  				return t, nil
  93  			}
  94  		}
  95  	}
  96  	if t.LangID != 0 {
  97  		// Search matches for lang-script and lang-region, where lang != und.
  98  		if t.LangID < langNoIndexOffset {
  99  			x := likelyLang[t.LangID]
 100  			if x.flags&isList != 0 {
 101  				list := likelyLangList[x.region : x.region+uint16(x.script)]
 102  				if t.ScriptID != 0 {
 103  					for _, x := range list {
 104  						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
 105  							t.setUndefinedRegion(Region(x.region))
 106  							return t, nil
 107  						}
 108  					}
 109  				} else if t.RegionID != 0 {
 110  					count := 0
 111  					goodScript := true
 112  					tt := t
 113  					for _, x := range list {
 114  						// We visit all entries for which the script was not
 115  						// defined, including the ones where the region was not
 116  						// defined. This allows for proper disambiguation within
 117  						// regions.
 118  						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
 119  							tt.RegionID = Region(x.region)
 120  							tt.setUndefinedScript(Script(x.script))
 121  							goodScript = goodScript && tt.ScriptID == Script(x.script)
 122  							count++
 123  						}
 124  					}
 125  					if count == 1 {
 126  						return tt, nil
 127  					}
 128  					// Even if we fail to find a unique Region, we might have
 129  					// an unambiguous script.
 130  					if goodScript {
 131  						t.ScriptID = tt.ScriptID
 132  					}
 133  				}
 134  			}
 135  		}
 136  	} else {
 137  		// Search matches for und-script.
 138  		if t.ScriptID != 0 {
 139  			x := likelyScript[t.ScriptID]
 140  			if x.region != 0 {
 141  				t.setUndefinedRegion(Region(x.region))
 142  				t.setUndefinedLang(Language(x.lang))
 143  				return t, nil
 144  			}
 145  		}
 146  		// Search matches for und-region. If und-script-region exists, it would
 147  		// have been found earlier.
 148  		if t.RegionID != 0 {
 149  			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
 150  				x := likelyRegionGroup[i]
 151  				if x.region != 0 {
 152  					t.setUndefinedLang(Language(x.lang))
 153  					t.setUndefinedScript(Script(x.script))
 154  					t.RegionID = Region(x.region)
 155  				}
 156  			} else {
 157  				x := likelyRegion[t.RegionID]
 158  				if x.flags&isList != 0 {
 159  					x = likelyRegionList[x.lang]
 160  				}
 161  				if x.script != 0 && x.flags != scriptInFrom {
 162  					t.setUndefinedLang(Language(x.lang))
 163  					t.setUndefinedScript(Script(x.script))
 164  					return t, nil
 165  				}
 166  			}
 167  		}
 168  	}
 169  
 170  	// Search matches for lang.
 171  	if t.LangID < langNoIndexOffset {
 172  		x := likelyLang[t.LangID]
 173  		if x.flags&isList != 0 {
 174  			x = likelyLangList[x.region]
 175  		}
 176  		if x.region != 0 {
 177  			t.setUndefinedScript(Script(x.script))
 178  			t.setUndefinedRegion(Region(x.region))
 179  		}
 180  		specializeRegion(&t)
 181  		if t.LangID == 0 {
 182  			t.LangID = _en // default language
 183  		}
 184  		return t, nil
 185  	}
 186  	return t, ErrMissingLikelyTagsData
 187  }
 188  
 189  func (t *Tag) setTagsFrom(id Tag) {
 190  	t.LangID = id.LangID
 191  	t.ScriptID = id.ScriptID
 192  	t.RegionID = id.RegionID
 193  }
 194  
 195  // minimize removes the region or script subtags from t such that
 196  // t.addLikelySubtags() == t.minimize().addLikelySubtags().
 197  func (t Tag) minimize() (Tag, error) {
 198  	t, err := minimizeTags(t)
 199  	if err != nil {
 200  		return t, err
 201  	}
 202  	t.RemakeString()
 203  	return t, nil
 204  }
 205  
 206  // minimizeTags mimics the behavior of the ICU 51 C implementation.
 207  func minimizeTags(t Tag) (Tag, error) {
 208  	if t.equalTags(Und) {
 209  		return t, nil
 210  	}
 211  	max, err := addTags(t)
 212  	if err != nil {
 213  		return t, err
 214  	}
 215  	for _, id := range [...]Tag{
 216  		{LangID: t.LangID},
 217  		{LangID: t.LangID, RegionID: t.RegionID},
 218  		{LangID: t.LangID, ScriptID: t.ScriptID},
 219  	} {
 220  		if x, err := addTags(id); err == nil && max.equalTags(x) {
 221  			t.setTagsFrom(id)
 222  			break
 223  		}
 224  	}
 225  	return t, nil
 226  }
 227