1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package unicode
6 7 // Bit masks for each code point under U+0100, for fast lookup.
8 const (
9 pC = 1 << iota // a control character.
10 pP // a punctuation character.
11 pN // a numeral.
12 pS // a symbolic character.
13 pZ // a spacing character.
14 pLu // an upper-case letter.
15 pLl // a lower-case letter.
16 pp // a printable character according to Go's definition.
17 pg = pp | pZ // a graphical character according to the Unicode definition.
18 pLo = pLl | pLu // a letter that is neither upper nor lower case.
19 pLmask = pLo
20 )
21 22 // GraphicRanges defines the set of graphic characters according to Unicode.
23 var GraphicRanges = []*RangeTable{
24 L, M, N, P, S, Zs,
25 }
26 27 // PrintRanges defines the set of printable characters according to Go.
28 // ASCII space, U+0020, is handled separately.
29 var PrintRanges = []*RangeTable{
30 L, M, N, P, S,
31 }
32 33 // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
34 // Such characters include letters, marks, numbers, punctuation, symbols, and
35 // spaces, from categories [L], [M], [N], [P], [S], [Zs].
36 func IsGraphic(r rune) bool {
37 // We convert to uint32 to avoid the extra test for negative,
38 // and in the index we convert to uint8 to avoid the range check.
39 if uint32(r) <= MaxLatin1 {
40 return properties[uint8(r)]&pg != 0
41 }
42 return In(r, GraphicRanges...)
43 }
44 45 // IsPrint reports whether the rune is defined as printable by Go. Such
46 // characters include letters, marks, numbers, punctuation, symbols, and the
47 // ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space
48 // character. This categorization is the same as [IsGraphic] except that the
49 // only spacing character is ASCII space, U+0020.
50 func IsPrint(r rune) bool {
51 if uint32(r) <= MaxLatin1 {
52 return properties[uint8(r)]&pp != 0
53 }
54 return In(r, PrintRanges...)
55 }
56 57 // IsOneOf reports whether the rune is a member of one of the ranges.
58 // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
59 func IsOneOf(ranges []*RangeTable, r rune) bool {
60 for _, inside := range ranges {
61 if Is(inside, r) {
62 return true
63 }
64 }
65 return false
66 }
67 68 // In reports whether the rune is a member of one of the ranges.
69 func In(r rune, ranges ...*RangeTable) bool {
70 for _, inside := range ranges {
71 if Is(inside, r) {
72 return true
73 }
74 }
75 return false
76 }
77 78 // IsControl reports whether the rune is a control character.
79 // The [C] ([Other]) Unicode category includes more code points
80 // such as surrogates; use [Is](C, r) to test for them.
81 func IsControl(r rune) bool {
82 if uint32(r) <= MaxLatin1 {
83 return properties[uint8(r)]&pC != 0
84 }
85 // All control characters are < MaxLatin1.
86 return false
87 }
88 89 // IsLetter reports whether the rune is a letter (category [L]).
90 func IsLetter(r rune) bool {
91 if uint32(r) <= MaxLatin1 {
92 return properties[uint8(r)]&(pLmask) != 0
93 }
94 return isExcludingLatin(Letter, r)
95 }
96 97 // IsMark reports whether the rune is a mark character (category [M]).
98 func IsMark(r rune) bool {
99 // There are no mark characters in Latin-1.
100 return isExcludingLatin(Mark, r)
101 }
102 103 // IsNumber reports whether the rune is a number (category [N]).
104 func IsNumber(r rune) bool {
105 if uint32(r) <= MaxLatin1 {
106 return properties[uint8(r)]&pN != 0
107 }
108 return isExcludingLatin(Number, r)
109 }
110 111 // IsPunct reports whether the rune is a Unicode punctuation character
112 // (category [P]).
113 func IsPunct(r rune) bool {
114 if uint32(r) <= MaxLatin1 {
115 return properties[uint8(r)]&pP != 0
116 }
117 return Is(Punct, r)
118 }
119 120 // IsSpace reports whether the rune is a space character as defined
121 // by Unicode's White Space property; in the Latin-1 space
122 // this is
123 //
124 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
125 //
126 // Other definitions of spacing characters are set by category
127 // Z and property [Pattern_White_Space].
128 func IsSpace(r rune) bool {
129 // This property isn't the same as Z; special-case it.
130 if uint32(r) <= MaxLatin1 {
131 switch r {
132 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
133 return true
134 }
135 return false
136 }
137 return isExcludingLatin(White_Space, r)
138 }
139 140 // IsSymbol reports whether the rune is a symbolic character.
141 func IsSymbol(r rune) bool {
142 if uint32(r) <= MaxLatin1 {
143 return properties[uint8(r)]&pS != 0
144 }
145 return isExcludingLatin(Symbol, r)
146 }
147