Source file src/pkg/strings/strings.go
1
2
3
4
5
6
7
8 package strings
9
10 import (
11 "internal/bytealg"
12 "unicode"
13 "unicode/utf8"
14 )
15
16
17
18
19 func explode(s string, n int) []string {
20 l := utf8.RuneCountInString(s)
21 if n < 0 || n > l {
22 n = l
23 }
24 a := make([]string, n)
25 for i := 0; i < n-1; i++ {
26 ch, size := utf8.DecodeRuneInString(s)
27 a[i] = s[:size]
28 s = s[size:]
29 if ch == utf8.RuneError {
30 a[i] = string(utf8.RuneError)
31 }
32 }
33 if n > 0 {
34 a[n-1] = s
35 }
36 return a
37 }
38
39
40 const primeRK = 16777619
41
42
43
44 func hashStr(sep string) (uint32, uint32) {
45 hash := uint32(0)
46 for i := 0; i < len(sep); i++ {
47 hash = hash*primeRK + uint32(sep[i])
48 }
49 var pow, sq uint32 = 1, primeRK
50 for i := len(sep); i > 0; i >>= 1 {
51 if i&1 != 0 {
52 pow *= sq
53 }
54 sq *= sq
55 }
56 return hash, pow
57 }
58
59
60
61 func hashStrRev(sep string) (uint32, uint32) {
62 hash := uint32(0)
63 for i := len(sep) - 1; i >= 0; i-- {
64 hash = hash*primeRK + uint32(sep[i])
65 }
66 var pow, sq uint32 = 1, primeRK
67 for i := len(sep); i > 0; i >>= 1 {
68 if i&1 != 0 {
69 pow *= sq
70 }
71 sq *= sq
72 }
73 return hash, pow
74 }
75
76
77
78 func Count(s, substr string) int {
79
80 if len(substr) == 0 {
81 return utf8.RuneCountInString(s) + 1
82 }
83 if len(substr) == 1 {
84 return bytealg.CountString(s, substr[0])
85 }
86 n := 0
87 for {
88 i := Index(s, substr)
89 if i == -1 {
90 return n
91 }
92 n++
93 s = s[i+len(substr):]
94 }
95 }
96
97
98 func Contains(s, substr string) bool {
99 return Index(s, substr) >= 0
100 }
101
102
103 func ContainsAny(s, chars string) bool {
104 return IndexAny(s, chars) >= 0
105 }
106
107
108 func ContainsRune(s string, r rune) bool {
109 return IndexRune(s, r) >= 0
110 }
111
112
113 func LastIndex(s, substr string) int {
114 n := len(substr)
115 switch {
116 case n == 0:
117 return len(s)
118 case n == 1:
119 return LastIndexByte(s, substr[0])
120 case n == len(s):
121 if substr == s {
122 return 0
123 }
124 return -1
125 case n > len(s):
126 return -1
127 }
128
129 hashss, pow := hashStrRev(substr)
130 last := len(s) - n
131 var h uint32
132 for i := len(s) - 1; i >= last; i-- {
133 h = h*primeRK + uint32(s[i])
134 }
135 if h == hashss && s[last:] == substr {
136 return last
137 }
138 for i := last - 1; i >= 0; i-- {
139 h *= primeRK
140 h += uint32(s[i])
141 h -= pow * uint32(s[i+n])
142 if h == hashss && s[i:i+n] == substr {
143 return i
144 }
145 }
146 return -1
147 }
148
149
150 func IndexByte(s string, c byte) int {
151 return bytealg.IndexByteString(s, c)
152 }
153
154
155
156
157
158 func IndexRune(s string, r rune) int {
159 switch {
160 case 0 <= r && r < utf8.RuneSelf:
161 return IndexByte(s, byte(r))
162 case r == utf8.RuneError:
163 for i, r := range s {
164 if r == utf8.RuneError {
165 return i
166 }
167 }
168 return -1
169 case !utf8.ValidRune(r):
170 return -1
171 default:
172 return Index(s, string(r))
173 }
174 }
175
176
177
178 func IndexAny(s, chars string) int {
179 if chars == "" {
180
181 return -1
182 }
183 if len(s) > 8 {
184 if as, isASCII := makeASCIISet(chars); isASCII {
185 for i := 0; i < len(s); i++ {
186 if as.contains(s[i]) {
187 return i
188 }
189 }
190 return -1
191 }
192 }
193 for i, c := range s {
194 for _, m := range chars {
195 if c == m {
196 return i
197 }
198 }
199 }
200 return -1
201 }
202
203
204
205
206 func LastIndexAny(s, chars string) int {
207 if chars == "" {
208
209 return -1
210 }
211 if len(s) > 8 {
212 if as, isASCII := makeASCIISet(chars); isASCII {
213 for i := len(s) - 1; i >= 0; i-- {
214 if as.contains(s[i]) {
215 return i
216 }
217 }
218 return -1
219 }
220 }
221 for i := len(s); i > 0; {
222 r, size := utf8.DecodeLastRuneInString(s[:i])
223 i -= size
224 for _, c := range chars {
225 if r == c {
226 return i
227 }
228 }
229 }
230 return -1
231 }
232
233
234 func LastIndexByte(s string, c byte) int {
235 for i := len(s) - 1; i >= 0; i-- {
236 if s[i] == c {
237 return i
238 }
239 }
240 return -1
241 }
242
243
244
245 func genSplit(s, sep string, sepSave, n int) []string {
246 if n == 0 {
247 return nil
248 }
249 if sep == "" {
250 return explode(s, n)
251 }
252 if n < 0 {
253 n = Count(s, sep) + 1
254 }
255
256 a := make([]string, n)
257 n--
258 i := 0
259 for i < n {
260 m := Index(s, sep)
261 if m < 0 {
262 break
263 }
264 a[i] = s[:m+sepSave]
265 s = s[m+len(sep):]
266 i++
267 }
268 a[i] = s
269 return a[:i+1]
270 }
271
272
273
274
275
276
277
278
279
280
281
282 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
283
284
285
286
287
288
289
290
291
292
293
294 func SplitAfterN(s, sep string, n int) []string {
295 return genSplit(s, sep, len(sep), n)
296 }
297
298
299
300
301
302
303
304
305
306
307
308 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
309
310
311
312
313
314
315
316
317
318
319
320 func SplitAfter(s, sep string) []string {
321 return genSplit(s, sep, len(sep), -1)
322 }
323
324 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
325
326
327
328
329 func Fields(s string) []string {
330
331
332 n := 0
333 wasSpace := 1
334
335 setBits := uint8(0)
336 for i := 0; i < len(s); i++ {
337 r := s[i]
338 setBits |= r
339 isSpace := int(asciiSpace[r])
340 n += wasSpace & ^isSpace
341 wasSpace = isSpace
342 }
343
344 if setBits >= utf8.RuneSelf {
345
346 return FieldsFunc(s, unicode.IsSpace)
347 }
348
349 a := make([]string, n)
350 na := 0
351 fieldStart := 0
352 i := 0
353
354 for i < len(s) && asciiSpace[s[i]] != 0 {
355 i++
356 }
357 fieldStart = i
358 for i < len(s) {
359 if asciiSpace[s[i]] == 0 {
360 i++
361 continue
362 }
363 a[na] = s[fieldStart:i]
364 na++
365 i++
366
367 for i < len(s) && asciiSpace[s[i]] != 0 {
368 i++
369 }
370 fieldStart = i
371 }
372 if fieldStart < len(s) {
373 a[na] = s[fieldStart:]
374 }
375 return a
376 }
377
378
379
380
381
382
383 func FieldsFunc(s string, f func(rune) bool) []string {
384
385
386 type span struct {
387 start int
388 end int
389 }
390 spans := make([]span, 0, 32)
391
392
393 wasField := false
394 fromIndex := 0
395 for i, rune := range s {
396 if f(rune) {
397 if wasField {
398 spans = append(spans, span{start: fromIndex, end: i})
399 wasField = false
400 }
401 } else {
402 if !wasField {
403 fromIndex = i
404 wasField = true
405 }
406 }
407 }
408
409
410 if wasField {
411 spans = append(spans, span{fromIndex, len(s)})
412 }
413
414
415 a := make([]string, len(spans))
416 for i, span := range spans {
417 a[i] = s[span.start:span.end]
418 }
419
420 return a
421 }
422
423
424
425 func Join(a []string, sep string) string {
426 switch len(a) {
427 case 0:
428 return ""
429 case 1:
430 return a[0]
431 }
432 n := len(sep) * (len(a) - 1)
433 for i := 0; i < len(a); i++ {
434 n += len(a[i])
435 }
436
437 var b Builder
438 b.Grow(n)
439 b.WriteString(a[0])
440 for _, s := range a[1:] {
441 b.WriteString(sep)
442 b.WriteString(s)
443 }
444 return b.String()
445 }
446
447
448 func HasPrefix(s, prefix string) bool {
449 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
450 }
451
452
453 func HasSuffix(s, suffix string) bool {
454 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
455 }
456
457
458
459
460 func Map(mapping func(rune) rune, s string) string {
461
462
463
464
465
466
467 var b Builder
468
469 for i, c := range s {
470 r := mapping(c)
471 if r == c && c != utf8.RuneError {
472 continue
473 }
474
475 var width int
476 if c == utf8.RuneError {
477 c, width = utf8.DecodeRuneInString(s[i:])
478 if width != 1 && r == c {
479 continue
480 }
481 } else {
482 width = utf8.RuneLen(c)
483 }
484
485 b.Grow(len(s) + utf8.UTFMax)
486 b.WriteString(s[:i])
487 if r >= 0 {
488 b.WriteRune(r)
489 }
490
491 s = s[i+width:]
492 break
493 }
494
495
496 if b.Cap() == 0 {
497 return s
498 }
499
500 for _, c := range s {
501 r := mapping(c)
502
503 if r >= 0 {
504
505
506
507 if r < utf8.RuneSelf {
508 b.WriteByte(byte(r))
509 } else {
510
511 b.WriteRune(r)
512 }
513 }
514 }
515
516 return b.String()
517 }
518
519
520
521
522
523 func Repeat(s string, count int) string {
524 if count == 0 {
525 return ""
526 }
527
528
529
530
531
532 if count < 0 {
533 panic("strings: negative Repeat count")
534 } else if len(s)*count/count != len(s) {
535 panic("strings: Repeat count causes overflow")
536 }
537
538 n := len(s) * count
539 var b Builder
540 b.Grow(n)
541 b.WriteString(s)
542 for b.Len() < n {
543 if b.Len() <= n/2 {
544 b.WriteString(b.String())
545 } else {
546 b.WriteString(b.String()[:n-b.Len()])
547 break
548 }
549 }
550 return b.String()
551 }
552
553
554 func ToUpper(s string) string {
555 isASCII, hasLower := true, false
556 for i := 0; i < len(s); i++ {
557 c := s[i]
558 if c >= utf8.RuneSelf {
559 isASCII = false
560 break
561 }
562 hasLower = hasLower || ('a' <= c && c <= 'z')
563 }
564
565 if isASCII {
566 if !hasLower {
567 return s
568 }
569 var b Builder
570 b.Grow(len(s))
571 for i := 0; i < len(s); i++ {
572 c := s[i]
573 if 'a' <= c && c <= 'z' {
574 c -= 'a' - 'A'
575 }
576 b.WriteByte(c)
577 }
578 return b.String()
579 }
580 return Map(unicode.ToUpper, s)
581 }
582
583
584 func ToLower(s string) string {
585 isASCII, hasUpper := true, false
586 for i := 0; i < len(s); i++ {
587 c := s[i]
588 if c >= utf8.RuneSelf {
589 isASCII = false
590 break
591 }
592 hasUpper = hasUpper || ('A' <= c && c <= 'Z')
593 }
594
595 if isASCII {
596 if !hasUpper {
597 return s
598 }
599 var b Builder
600 b.Grow(len(s))
601 for i := 0; i < len(s); i++ {
602 c := s[i]
603 if 'A' <= c && c <= 'Z' {
604 c += 'a' - 'A'
605 }
606 b.WriteByte(c)
607 }
608 return b.String()
609 }
610 return Map(unicode.ToLower, s)
611 }
612
613
614
615 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
616
617
618
619 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
620 return Map(c.ToUpper, s)
621 }
622
623
624
625 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
626 return Map(c.ToLower, s)
627 }
628
629
630
631 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
632 return Map(c.ToTitle, s)
633 }
634
635
636
637 func ToValidUTF8(s, replacement string) string {
638 var b Builder
639
640 for i, c := range s {
641 if c != utf8.RuneError {
642 continue
643 }
644
645 _, wid := utf8.DecodeRuneInString(s[i:])
646 if wid == 1 {
647 b.Grow(len(s) + len(replacement))
648 b.WriteString(s[:i])
649 s = s[i:]
650 break
651 }
652 }
653
654
655 if b.Cap() == 0 {
656 return s
657 }
658
659 invalid := false
660 for i := 0; i < len(s); {
661 c := s[i]
662 if c < utf8.RuneSelf {
663 i++
664 invalid = false
665 b.WriteByte(c)
666 continue
667 }
668 _, wid := utf8.DecodeRuneInString(s[i:])
669 if wid == 1 {
670 i++
671 if !invalid {
672 invalid = true
673 b.WriteString(replacement)
674 }
675 continue
676 }
677 invalid = false
678 b.WriteString(s[i : i+wid])
679 i += wid
680 }
681
682 return b.String()
683 }
684
685
686
687 func isSeparator(r rune) bool {
688
689 if r <= 0x7F {
690 switch {
691 case '0' <= r && r <= '9':
692 return false
693 case 'a' <= r && r <= 'z':
694 return false
695 case 'A' <= r && r <= 'Z':
696 return false
697 case r == '_':
698 return false
699 }
700 return true
701 }
702
703 if unicode.IsLetter(r) || unicode.IsDigit(r) {
704 return false
705 }
706
707 return unicode.IsSpace(r)
708 }
709
710
711
712
713
714 func Title(s string) string {
715
716
717
718 prev := ' '
719 return Map(
720 func(r rune) rune {
721 if isSeparator(prev) {
722 prev = r
723 return unicode.ToTitle(r)
724 }
725 prev = r
726 return r
727 },
728 s)
729 }
730
731
732
733 func TrimLeftFunc(s string, f func(rune) bool) string {
734 i := indexFunc(s, f, false)
735 if i == -1 {
736 return ""
737 }
738 return s[i:]
739 }
740
741
742
743 func TrimRightFunc(s string, f func(rune) bool) string {
744 i := lastIndexFunc(s, f, false)
745 if i >= 0 && s[i] >= utf8.RuneSelf {
746 _, wid := utf8.DecodeRuneInString(s[i:])
747 i += wid
748 } else {
749 i++
750 }
751 return s[0:i]
752 }
753
754
755
756 func TrimFunc(s string, f func(rune) bool) string {
757 return TrimRightFunc(TrimLeftFunc(s, f), f)
758 }
759
760
761
762 func IndexFunc(s string, f func(rune) bool) int {
763 return indexFunc(s, f, true)
764 }
765
766
767
768 func LastIndexFunc(s string, f func(rune) bool) int {
769 return lastIndexFunc(s, f, true)
770 }
771
772
773
774
775 func indexFunc(s string, f func(rune) bool, truth bool) int {
776 for i, r := range s {
777 if f(r) == truth {
778 return i
779 }
780 }
781 return -1
782 }
783
784
785
786
787 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
788 for i := len(s); i > 0; {
789 r, size := utf8.DecodeLastRuneInString(s[0:i])
790 i -= size
791 if f(r) == truth {
792 return i
793 }
794 }
795 return -1
796 }
797
798
799
800
801
802
803
804 type asciiSet [8]uint32
805
806
807
808 func makeASCIISet(chars string) (as asciiSet, ok bool) {
809 for i := 0; i < len(chars); i++ {
810 c := chars[i]
811 if c >= utf8.RuneSelf {
812 return as, false
813 }
814 as[c>>5] |= 1 << uint(c&31)
815 }
816 return as, true
817 }
818
819
820 func (as *asciiSet) contains(c byte) bool {
821 return (as[c>>5] & (1 << uint(c&31))) != 0
822 }
823
824 func makeCutsetFunc(cutset string) func(rune) bool {
825 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
826 return func(r rune) bool {
827 return r == rune(cutset[0])
828 }
829 }
830 if as, isASCII := makeASCIISet(cutset); isASCII {
831 return func(r rune) bool {
832 return r < utf8.RuneSelf && as.contains(byte(r))
833 }
834 }
835 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
836 }
837
838
839
840 func Trim(s string, cutset string) string {
841 if s == "" || cutset == "" {
842 return s
843 }
844 return TrimFunc(s, makeCutsetFunc(cutset))
845 }
846
847
848
849
850
851 func TrimLeft(s string, cutset string) string {
852 if s == "" || cutset == "" {
853 return s
854 }
855 return TrimLeftFunc(s, makeCutsetFunc(cutset))
856 }
857
858
859
860
861
862 func TrimRight(s string, cutset string) string {
863 if s == "" || cutset == "" {
864 return s
865 }
866 return TrimRightFunc(s, makeCutsetFunc(cutset))
867 }
868
869
870
871 func TrimSpace(s string) string {
872
873 start := 0
874 for ; start < len(s); start++ {
875 c := s[start]
876 if c >= utf8.RuneSelf {
877
878
879 return TrimFunc(s[start:], unicode.IsSpace)
880 }
881 if asciiSpace[c] == 0 {
882 break
883 }
884 }
885
886
887 stop := len(s)
888 for ; stop > start; stop-- {
889 c := s[stop-1]
890 if c >= utf8.RuneSelf {
891 return TrimFunc(s[start:stop], unicode.IsSpace)
892 }
893 if asciiSpace[c] == 0 {
894 break
895 }
896 }
897
898
899
900
901 return s[start:stop]
902 }
903
904
905
906 func TrimPrefix(s, prefix string) string {
907 if HasPrefix(s, prefix) {
908 return s[len(prefix):]
909 }
910 return s
911 }
912
913
914
915 func TrimSuffix(s, suffix string) string {
916 if HasSuffix(s, suffix) {
917 return s[:len(s)-len(suffix)]
918 }
919 return s
920 }
921
922
923
924
925
926
927
928 func Replace(s, old, new string, n int) string {
929 if old == new || n == 0 {
930 return s
931 }
932
933
934 if m := Count(s, old); m == 0 {
935 return s
936 } else if n < 0 || m < n {
937 n = m
938 }
939
940
941 t := make([]byte, len(s)+n*(len(new)-len(old)))
942 w := 0
943 start := 0
944 for i := 0; i < n; i++ {
945 j := start
946 if len(old) == 0 {
947 if i > 0 {
948 _, wid := utf8.DecodeRuneInString(s[start:])
949 j += wid
950 }
951 } else {
952 j += Index(s[start:], old)
953 }
954 w += copy(t[w:], s[start:j])
955 w += copy(t[w:], new)
956 start = j + len(old)
957 }
958 w += copy(t[w:], s[start:])
959 return string(t[0:w])
960 }
961
962
963
964
965
966
967 func ReplaceAll(s, old, new string) string {
968 return Replace(s, old, new, -1)
969 }
970
971
972
973 func EqualFold(s, t string) bool {
974 for s != "" && t != "" {
975
976 var sr, tr rune
977 if s[0] < utf8.RuneSelf {
978 sr, s = rune(s[0]), s[1:]
979 } else {
980 r, size := utf8.DecodeRuneInString(s)
981 sr, s = r, s[size:]
982 }
983 if t[0] < utf8.RuneSelf {
984 tr, t = rune(t[0]), t[1:]
985 } else {
986 r, size := utf8.DecodeRuneInString(t)
987 tr, t = r, t[size:]
988 }
989
990
991
992
993 if tr == sr {
994 continue
995 }
996
997
998 if tr < sr {
999 tr, sr = sr, tr
1000 }
1001
1002 if tr < utf8.RuneSelf {
1003
1004 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1005 continue
1006 }
1007 return false
1008 }
1009
1010
1011
1012 r := unicode.SimpleFold(sr)
1013 for r != sr && r < tr {
1014 r = unicode.SimpleFold(r)
1015 }
1016 if r == tr {
1017 continue
1018 }
1019 return false
1020 }
1021
1022
1023 return s == t
1024 }
1025
1026
1027 func Index(s, substr string) int {
1028 n := len(substr)
1029 switch {
1030 case n == 0:
1031 return 0
1032 case n == 1:
1033 return IndexByte(s, substr[0])
1034 case n == len(s):
1035 if substr == s {
1036 return 0
1037 }
1038 return -1
1039 case n > len(s):
1040 return -1
1041 case n <= bytealg.MaxLen:
1042
1043 if len(s) <= bytealg.MaxBruteForce {
1044 return bytealg.IndexString(s, substr)
1045 }
1046 c0 := substr[0]
1047 c1 := substr[1]
1048 i := 0
1049 t := len(s) - n + 1
1050 fails := 0
1051 for i < t {
1052 if s[i] != c0 {
1053
1054
1055 o := IndexByte(s[i:t], c0)
1056 if o < 0 {
1057 return -1
1058 }
1059 i += o
1060 }
1061 if s[i+1] == c1 && s[i:i+n] == substr {
1062 return i
1063 }
1064 fails++
1065 i++
1066
1067 if fails > bytealg.Cutover(i) {
1068 r := bytealg.IndexString(s[i:], substr)
1069 if r >= 0 {
1070 return r + i
1071 }
1072 return -1
1073 }
1074 }
1075 return -1
1076 }
1077 c0 := substr[0]
1078 c1 := substr[1]
1079 i := 0
1080 t := len(s) - n + 1
1081 fails := 0
1082 for i < t {
1083 if s[i] != c0 {
1084 o := IndexByte(s[i:t], c0)
1085 if o < 0 {
1086 return -1
1087 }
1088 i += o
1089 }
1090 if s[i+1] == c1 && s[i:i+n] == substr {
1091 return i
1092 }
1093 i++
1094 fails++
1095 if fails >= 4+i>>4 && i < t {
1096
1097 j := indexRabinKarp(s[i:], substr)
1098 if j < 0 {
1099 return -1
1100 }
1101 return i + j
1102 }
1103 }
1104 return -1
1105 }
1106
1107 func indexRabinKarp(s, substr string) int {
1108
1109 hashss, pow := hashStr(substr)
1110 n := len(substr)
1111 var h uint32
1112 for i := 0; i < n; i++ {
1113 h = h*primeRK + uint32(s[i])
1114 }
1115 if h == hashss && s[:n] == substr {
1116 return 0
1117 }
1118 for i := n; i < len(s); {
1119 h *= primeRK
1120 h += uint32(s[i])
1121 h -= pow * uint32(s[i-n])
1122 i++
1123 if h == hashss && s[i-n:i] == substr {
1124 return i - n
1125 }
1126 }
1127 return -1
1128 }
1129
View as plain text