...
Source file src/unicode/letter.go
1
2
3
4
5
6
7 package unicode
8
9 const (
10 MaxRune = '\U0010FFFF'
11 ReplacementChar = '\uFFFD'
12 MaxASCII = '\u007F'
13 MaxLatin1 = '\u00FF'
14 )
15
16
17
18
19
20
21 type RangeTable struct {
22 R16 []Range16
23 R32 []Range32
24 LatinOffset int
25 }
26
27
28
29 type Range16 struct {
30 Lo uint16
31 Hi uint16
32 Stride uint16
33 }
34
35
36
37
38 type Range32 struct {
39 Lo uint32
40 Hi uint32
41 Stride uint32
42 }
43
44
45
46
47
48
49
50
51
52
53
54 type CaseRange struct {
55 Lo uint32
56 Hi uint32
57 Delta d
58 }
59
60
61
62 type SpecialCase []CaseRange
63
64
65
66
67
68 const (
69 UpperCase = iota
70 LowerCase
71 TitleCase
72 MaxCase
73 )
74
75 type d [MaxCase]rune
76
77
78
79
80 const (
81 UpperLower = MaxRune + 1
82 )
83
84
85
86 const linearMax = 18
87
88
89 func is16(ranges []Range16, r uint16) bool {
90 if len(ranges) <= linearMax || r <= MaxLatin1 {
91 for i := range ranges {
92 range_ := &ranges[i]
93 if r < range_.Lo {
94 return false
95 }
96 if r <= range_.Hi {
97 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
98 }
99 }
100 return false
101 }
102
103
104 lo := 0
105 hi := len(ranges)
106 for lo < hi {
107 m := lo + (hi-lo)/2
108 range_ := &ranges[m]
109 if range_.Lo <= r && r <= range_.Hi {
110 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
111 }
112 if r < range_.Lo {
113 hi = m
114 } else {
115 lo = m + 1
116 }
117 }
118 return false
119 }
120
121
122 func is32(ranges []Range32, r uint32) bool {
123 if len(ranges) <= linearMax {
124 for i := range ranges {
125 range_ := &ranges[i]
126 if r < range_.Lo {
127 return false
128 }
129 if r <= range_.Hi {
130 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
131 }
132 }
133 return false
134 }
135
136
137 lo := 0
138 hi := len(ranges)
139 for lo < hi {
140 m := lo + (hi-lo)/2
141 range_ := ranges[m]
142 if range_.Lo <= r && r <= range_.Hi {
143 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
144 }
145 if r < range_.Lo {
146 hi = m
147 } else {
148 lo = m + 1
149 }
150 }
151 return false
152 }
153
154
155 func Is(rangeTab *RangeTable, r rune) bool {
156 r16 := rangeTab.R16
157 if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
158 return is16(r16, uint16(r))
159 }
160 r32 := rangeTab.R32
161 if len(r32) > 0 && r >= rune(r32[0].Lo) {
162 return is32(r32, uint32(r))
163 }
164 return false
165 }
166
167 func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
168 r16 := rangeTab.R16
169 if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
170 return is16(r16[off:], uint16(r))
171 }
172 r32 := rangeTab.R32
173 if len(r32) > 0 && r >= rune(r32[0].Lo) {
174 return is32(r32, uint32(r))
175 }
176 return false
177 }
178
179
180 func IsUpper(r rune) bool {
181
182 if uint32(r) <= MaxLatin1 {
183 return properties[uint8(r)]&pLmask == pLu
184 }
185 return isExcludingLatin(Upper, r)
186 }
187
188
189 func IsLower(r rune) bool {
190
191 if uint32(r) <= MaxLatin1 {
192 return properties[uint8(r)]&pLmask == pLl
193 }
194 return isExcludingLatin(Lower, r)
195 }
196
197
198 func IsTitle(r rune) bool {
199 if r <= MaxLatin1 {
200 return false
201 }
202 return isExcludingLatin(Title, r)
203 }
204
205
206
207 func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
208 if _case < 0 || MaxCase <= _case {
209 return ReplacementChar, false
210 }
211
212 lo := 0
213 hi := len(caseRange)
214 for lo < hi {
215 m := lo + (hi-lo)/2
216 cr := caseRange[m]
217 if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
218 delta := cr.Delta[_case]
219 if delta > MaxRune {
220
221
222
223
224
225
226
227
228
229
230 return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
231 }
232 return r + delta, true
233 }
234 if r < rune(cr.Lo) {
235 hi = m
236 } else {
237 lo = m + 1
238 }
239 }
240 return r, false
241 }
242
243
244 func To(_case int, r rune) rune {
245 r, _ = to(_case, r, CaseRanges)
246 return r
247 }
248
249
250 func ToUpper(r rune) rune {
251 if r <= MaxASCII {
252 if 'a' <= r && r <= 'z' {
253 r -= 'a' - 'A'
254 }
255 return r
256 }
257 return To(UpperCase, r)
258 }
259
260
261 func ToLower(r rune) rune {
262 if r <= MaxASCII {
263 if 'A' <= r && r <= 'Z' {
264 r += 'a' - 'A'
265 }
266 return r
267 }
268 return To(LowerCase, r)
269 }
270
271
272 func ToTitle(r rune) rune {
273 if r <= MaxASCII {
274 if 'a' <= r && r <= 'z' {
275 r -= 'a' - 'A'
276 }
277 return r
278 }
279 return To(TitleCase, r)
280 }
281
282
283 func (special SpecialCase) ToUpper(r rune) rune {
284 r1, hadMapping := to(UpperCase, r, []CaseRange(special))
285 if r1 == r && !hadMapping {
286 r1 = ToUpper(r)
287 }
288 return r1
289 }
290
291
292 func (special SpecialCase) ToTitle(r rune) rune {
293 r1, hadMapping := to(TitleCase, r, []CaseRange(special))
294 if r1 == r && !hadMapping {
295 r1 = ToTitle(r)
296 }
297 return r1
298 }
299
300
301 func (special SpecialCase) ToLower(r rune) rune {
302 r1, hadMapping := to(LowerCase, r, []CaseRange(special))
303 if r1 == r && !hadMapping {
304 r1 = ToLower(r)
305 }
306 return r1
307 }
308
309
310
311
312
313 type foldPair struct {
314 From uint16
315 To uint16
316 }
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336 func SimpleFold(r rune) rune {
337 if r < 0 || r > MaxRune {
338 return r
339 }
340
341 if int(r) < len(asciiFold) {
342 return rune(asciiFold[r])
343 }
344
345
346 lo := 0
347 hi := len(caseOrbit)
348 for lo < hi {
349 m := lo + (hi-lo)/2
350 if rune(caseOrbit[m].From) < r {
351 lo = m + 1
352 } else {
353 hi = m
354 }
355 }
356 if lo < len(caseOrbit) && rune(caseOrbit[lo].From) == r {
357 return rune(caseOrbit[lo].To)
358 }
359
360
361
362
363 if l := ToLower(r); l != r {
364 return l
365 }
366 return ToUpper(r)
367 }
368
View as plain text