graphic.mx raw

   1  // Copyright 2011 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package unicode
   6  
   7  // Bit masks for each code point under U+0100, for fast lookup.
   8  const (
   9  	pC     = 1 << iota // a control character.
  10  	pP                 // a punctuation character.
  11  	pN                 // a numeral.
  12  	pS                 // a symbolic character.
  13  	pZ                 // a spacing character.
  14  	pLu                // an upper-case letter.
  15  	pLl                // a lower-case letter.
  16  	pp                 // a printable character according to Go's definition.
  17  	pg     = pp | pZ   // a graphical character according to the Unicode definition.
  18  	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
  19  	pLmask = pLo
  20  )
  21  
  22  // GraphicRanges defines the set of graphic characters according to Unicode.
  23  var GraphicRanges = []*RangeTable{
  24  	L, M, N, P, S, Zs,
  25  }
  26  
  27  // PrintRanges defines the set of printable characters according to Go.
  28  // ASCII space, U+0020, is handled separately.
  29  var PrintRanges = []*RangeTable{
  30  	L, M, N, P, S,
  31  }
  32  
  33  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
  34  // Such characters include letters, marks, numbers, punctuation, symbols, and
  35  // spaces, from categories [L], [M], [N], [P], [S], [Zs].
  36  func IsGraphic(r rune) bool {
  37  	// We convert to uint32 to avoid the extra test for negative,
  38  	// and in the index we convert to uint8 to avoid the range check.
  39  	if uint32(r) <= MaxLatin1 {
  40  		return properties[uint8(r)]&pg != 0
  41  	}
  42  	return In(r, GraphicRanges...)
  43  }
  44  
  45  // IsPrint reports whether the rune is defined as printable by Go. Such
  46  // characters include letters, marks, numbers, punctuation, symbols, and the
  47  // ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space
  48  // character. This categorization is the same as [IsGraphic] except that the
  49  // only spacing character is ASCII space, U+0020.
  50  func IsPrint(r rune) bool {
  51  	if uint32(r) <= MaxLatin1 {
  52  		return properties[uint8(r)]&pp != 0
  53  	}
  54  	return In(r, PrintRanges...)
  55  }
  56  
  57  // IsOneOf reports whether the rune is a member of one of the ranges.
  58  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
  59  func IsOneOf(ranges []*RangeTable, r rune) bool {
  60  	for _, inside := range ranges {
  61  		if Is(inside, r) {
  62  			return true
  63  		}
  64  	}
  65  	return false
  66  }
  67  
  68  // In reports whether the rune is a member of one of the ranges.
  69  func In(r rune, ranges ...*RangeTable) bool {
  70  	for _, inside := range ranges {
  71  		if Is(inside, r) {
  72  			return true
  73  		}
  74  	}
  75  	return false
  76  }
  77  
  78  // IsControl reports whether the rune is a control character.
  79  // The [C] ([Other]) Unicode category includes more code points
  80  // such as surrogates; use [Is](C, r) to test for them.
  81  func IsControl(r rune) bool {
  82  	if uint32(r) <= MaxLatin1 {
  83  		return properties[uint8(r)]&pC != 0
  84  	}
  85  	// All control characters are < MaxLatin1.
  86  	return false
  87  }
  88  
  89  // IsLetter reports whether the rune is a letter (category [L]).
  90  func IsLetter(r rune) bool {
  91  	if uint32(r) <= MaxLatin1 {
  92  		return properties[uint8(r)]&(pLmask) != 0
  93  	}
  94  	return isExcludingLatin(Letter, r)
  95  }
  96  
  97  // IsMark reports whether the rune is a mark character (category [M]).
  98  func IsMark(r rune) bool {
  99  	// There are no mark characters in Latin-1.
 100  	return isExcludingLatin(Mark, r)
 101  }
 102  
 103  // IsNumber reports whether the rune is a number (category [N]).
 104  func IsNumber(r rune) bool {
 105  	if uint32(r) <= MaxLatin1 {
 106  		return properties[uint8(r)]&pN != 0
 107  	}
 108  	return isExcludingLatin(Number, r)
 109  }
 110  
 111  // IsPunct reports whether the rune is a Unicode punctuation character
 112  // (category [P]).
 113  func IsPunct(r rune) bool {
 114  	if uint32(r) <= MaxLatin1 {
 115  		return properties[uint8(r)]&pP != 0
 116  	}
 117  	return Is(Punct, r)
 118  }
 119  
 120  // IsSpace reports whether the rune is a space character as defined
 121  // by Unicode's White Space property; in the Latin-1 space
 122  // this is
 123  //
 124  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
 125  //
 126  // Other definitions of spacing characters are set by category
 127  // Z and property [Pattern_White_Space].
 128  func IsSpace(r rune) bool {
 129  	// This property isn't the same as Z; special-case it.
 130  	if uint32(r) <= MaxLatin1 {
 131  		switch r {
 132  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
 133  			return true
 134  		}
 135  		return false
 136  	}
 137  	return isExcludingLatin(White_Space, r)
 138  }
 139  
 140  // IsSymbol reports whether the rune is a symbolic character.
 141  func IsSymbol(r rune) bool {
 142  	if uint32(r) <= MaxLatin1 {
 143  		return properties[uint8(r)]&pS != 0
 144  	}
 145  	return isExcludingLatin(Symbol, r)
 146  }
 147