Source file src/unicode/utf8/utf8.go
1
2
3
4
5
6
7 package utf8
8
9
10
11
12
13
14 const (
15 RuneError = '\uFFFD'
16 RuneSelf = 0x80
17 MaxRune = '\U0010FFFF'
18 UTFMax = 4
19 )
20
21
22 const (
23 surrogateMin = 0xD800
24 surrogateMax = 0xDFFF
25 )
26
27 const (
28 t1 = 0b00000000
29 tx = 0b10000000
30 t2 = 0b11000000
31 t3 = 0b11100000
32 t4 = 0b11110000
33 t5 = 0b11111000
34
35 maskx = 0b00111111
36 mask2 = 0b00011111
37 mask3 = 0b00001111
38 mask4 = 0b00000111
39
40 rune1Max = 1<<7 - 1
41 rune2Max = 1<<11 - 1
42 rune3Max = 1<<16 - 1
43
44
45 locb = 0b10000000
46 hicb = 0b10111111
47
48
49
50
51
52 xx = 0xF1
53 as = 0xF0
54 s1 = 0x02
55 s2 = 0x13
56 s3 = 0x03
57 s4 = 0x23
58 s5 = 0x34
59 s6 = 0x04
60 s7 = 0x44
61 )
62
63
64 var first = [256]uint8{
65
66 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
67 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
68 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
69 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
70 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
71 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
72 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
73 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
74
75 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
76 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
77 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
78 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
79 xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
80 s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
81 s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3,
82 s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
83 }
84
85
86
87 type acceptRange struct {
88 lo uint8
89 hi uint8
90 }
91
92
93 var acceptRanges = [16]acceptRange{
94 0: {locb, hicb},
95 1: {0xA0, hicb},
96 2: {locb, 0x9F},
97 3: {0x90, hicb},
98 4: {locb, 0x8F},
99 }
100
101
102
103 func FullRune(p []byte) bool {
104 n := len(p)
105 if n == 0 {
106 return false
107 }
108 x := first[p[0]]
109 if n >= int(x&7) {
110 return true
111 }
112
113 accept := acceptRanges[x>>4]
114 if n > 1 && (p[1] < accept.lo || accept.hi < p[1]) {
115 return true
116 } else if n > 2 && (p[2] < locb || hicb < p[2]) {
117 return true
118 }
119 return false
120 }
121
122
123 func FullRuneInString(s string) bool {
124 n := len(s)
125 if n == 0 {
126 return false
127 }
128 x := first[s[0]]
129 if n >= int(x&7) {
130 return true
131 }
132
133 accept := acceptRanges[x>>4]
134 if n > 1 && (s[1] < accept.lo || accept.hi < s[1]) {
135 return true
136 } else if n > 2 && (s[2] < locb || hicb < s[2]) {
137 return true
138 }
139 return false
140 }
141
142
143
144
145
146
147
148
149
150 func DecodeRune(p []byte) (r rune, size int) {
151 n := len(p)
152 if n < 1 {
153 return RuneError, 0
154 }
155 p0 := p[0]
156 x := first[p0]
157 if x >= as {
158
159
160
161 mask := rune(x) << 31 >> 31
162 return rune(p[0])&^mask | RuneError&mask, 1
163 }
164 sz := int(x & 7)
165 accept := acceptRanges[x>>4]
166 if n < sz {
167 return RuneError, 1
168 }
169 b1 := p[1]
170 if b1 < accept.lo || accept.hi < b1 {
171 return RuneError, 1
172 }
173 if sz <= 2 {
174 return rune(p0&mask2)<<6 | rune(b1&maskx), 2
175 }
176 b2 := p[2]
177 if b2 < locb || hicb < b2 {
178 return RuneError, 1
179 }
180 if sz <= 3 {
181 return rune(p0&mask3)<<12 | rune(b1&maskx)<<6 | rune(b2&maskx), 3
182 }
183 b3 := p[3]
184 if b3 < locb || hicb < b3 {
185 return RuneError, 1
186 }
187 return rune(p0&mask4)<<18 | rune(b1&maskx)<<12 | rune(b2&maskx)<<6 | rune(b3&maskx), 4
188 }
189
190
191
192
193
194
195
196
197
198 func DecodeRuneInString(s string) (r rune, size int) {
199 n := len(s)
200 if n < 1 {
201 return RuneError, 0
202 }
203 s0 := s[0]
204 x := first[s0]
205 if x >= as {
206
207
208
209 mask := rune(x) << 31 >> 31
210 return rune(s[0])&^mask | RuneError&mask, 1
211 }
212 sz := int(x & 7)
213 accept := acceptRanges[x>>4]
214 if n < sz {
215 return RuneError, 1
216 }
217 s1 := s[1]
218 if s1 < accept.lo || accept.hi < s1 {
219 return RuneError, 1
220 }
221 if sz <= 2 {
222 return rune(s0&mask2)<<6 | rune(s1&maskx), 2
223 }
224 s2 := s[2]
225 if s2 < locb || hicb < s2 {
226 return RuneError, 1
227 }
228 if sz <= 3 {
229 return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3
230 }
231 s3 := s[3]
232 if s3 < locb || hicb < s3 {
233 return RuneError, 1
234 }
235 return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4
236 }
237
238
239
240
241
242
243
244
245
246 func DecodeLastRune(p []byte) (r rune, size int) {
247 end := len(p)
248 if end == 0 {
249 return RuneError, 0
250 }
251 start := end - 1
252 r = rune(p[start])
253 if r < RuneSelf {
254 return r, 1
255 }
256
257
258
259 lim := end - UTFMax
260 if lim < 0 {
261 lim = 0
262 }
263 for start--; start >= lim; start-- {
264 if RuneStart(p[start]) {
265 break
266 }
267 }
268 if start < 0 {
269 start = 0
270 }
271 r, size = DecodeRune(p[start:end])
272 if start+size != end {
273 return RuneError, 1
274 }
275 return r, size
276 }
277
278
279
280
281
282
283
284
285
286 func DecodeLastRuneInString(s string) (r rune, size int) {
287 end := len(s)
288 if end == 0 {
289 return RuneError, 0
290 }
291 start := end - 1
292 r = rune(s[start])
293 if r < RuneSelf {
294 return r, 1
295 }
296
297
298
299 lim := end - UTFMax
300 if lim < 0 {
301 lim = 0
302 }
303 for start--; start >= lim; start-- {
304 if RuneStart(s[start]) {
305 break
306 }
307 }
308 if start < 0 {
309 start = 0
310 }
311 r, size = DecodeRuneInString(s[start:end])
312 if start+size != end {
313 return RuneError, 1
314 }
315 return r, size
316 }
317
318
319
320 func RuneLen(r rune) int {
321 switch {
322 case r < 0:
323 return -1
324 case r <= rune1Max:
325 return 1
326 case r <= rune2Max:
327 return 2
328 case surrogateMin <= r && r <= surrogateMax:
329 return -1
330 case r <= rune3Max:
331 return 3
332 case r <= MaxRune:
333 return 4
334 }
335 return -1
336 }
337
338
339
340 func EncodeRune(p []byte, r rune) int {
341
342 switch i := uint32(r); {
343 case i <= rune1Max:
344 p[0] = byte(r)
345 return 1
346 case i <= rune2Max:
347 _ = p[1]
348 p[0] = t2 | byte(r>>6)
349 p[1] = tx | byte(r)&maskx
350 return 2
351 case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
352 r = RuneError
353 fallthrough
354 case i <= rune3Max:
355 _ = p[2]
356 p[0] = t3 | byte(r>>12)
357 p[1] = tx | byte(r>>6)&maskx
358 p[2] = tx | byte(r)&maskx
359 return 3
360 default:
361 _ = p[3]
362 p[0] = t4 | byte(r>>18)
363 p[1] = tx | byte(r>>12)&maskx
364 p[2] = tx | byte(r>>6)&maskx
365 p[3] = tx | byte(r)&maskx
366 return 4
367 }
368 }
369
370
371
372 func RuneCount(p []byte) int {
373 np := len(p)
374 var n int
375 for i := 0; i < np; {
376 n++
377 c := p[i]
378 if c < RuneSelf {
379
380 i++
381 continue
382 }
383 x := first[c]
384 if x == xx {
385 i++
386 continue
387 }
388 size := int(x & 7)
389 if i+size > np {
390 i++
391 continue
392 }
393 accept := acceptRanges[x>>4]
394 if c := p[i+1]; c < accept.lo || accept.hi < c {
395 size = 1
396 } else if size == 2 {
397 } else if c := p[i+2]; c < locb || hicb < c {
398 size = 1
399 } else if size == 3 {
400 } else if c := p[i+3]; c < locb || hicb < c {
401 size = 1
402 }
403 i += size
404 }
405 return n
406 }
407
408
409 func RuneCountInString(s string) (n int) {
410 ns := len(s)
411 for i := 0; i < ns; n++ {
412 c := s[i]
413 if c < RuneSelf {
414
415 i++
416 continue
417 }
418 x := first[c]
419 if x == xx {
420 i++
421 continue
422 }
423 size := int(x & 7)
424 if i+size > ns {
425 i++
426 continue
427 }
428 accept := acceptRanges[x>>4]
429 if c := s[i+1]; c < accept.lo || accept.hi < c {
430 size = 1
431 } else if size == 2 {
432 } else if c := s[i+2]; c < locb || hicb < c {
433 size = 1
434 } else if size == 3 {
435 } else if c := s[i+3]; c < locb || hicb < c {
436 size = 1
437 }
438 i += size
439 }
440 return n
441 }
442
443
444
445
446 func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
447
448
449 func Valid(p []byte) bool {
450 n := len(p)
451 for i := 0; i < n; {
452 pi := p[i]
453 if pi < RuneSelf {
454 i++
455 continue
456 }
457 x := first[pi]
458 if x == xx {
459 return false
460 }
461 size := int(x & 7)
462 if i+size > n {
463 return false
464 }
465 accept := acceptRanges[x>>4]
466 if c := p[i+1]; c < accept.lo || accept.hi < c {
467 return false
468 } else if size == 2 {
469 } else if c := p[i+2]; c < locb || hicb < c {
470 return false
471 } else if size == 3 {
472 } else if c := p[i+3]; c < locb || hicb < c {
473 return false
474 }
475 i += size
476 }
477 return true
478 }
479
480
481 func ValidString(s string) bool {
482 n := len(s)
483 for i := 0; i < n; {
484 si := s[i]
485 if si < RuneSelf {
486 i++
487 continue
488 }
489 x := first[si]
490 if x == xx {
491 return false
492 }
493 size := int(x & 7)
494 if i+size > n {
495 return false
496 }
497 accept := acceptRanges[x>>4]
498 if c := s[i+1]; c < accept.lo || accept.hi < c {
499 return false
500 } else if size == 2 {
501 } else if c := s[i+2]; c < locb || hicb < c {
502 return false
503 } else if size == 3 {
504 } else if c := s[i+3]; c < locb || hicb < c {
505 return false
506 }
507 i += size
508 }
509 return true
510 }
511
512
513
514 func ValidRune(r rune) bool {
515 switch {
516 case 0 <= r && r < surrogateMin:
517 return true
518 case surrogateMax < r && r <= MaxRune:
519 return true
520 }
521 return false
522 }
523
View as plain text