Source file src/strings/strings.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package strings implements simple functions to manipulate UTF-8 encoded strings.
     6	//
     7	// For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
     8	package strings
     9	
    10	import (
    11		"internal/bytealg"
    12		"unicode"
    13		"unicode/utf8"
    14	)
    15	
    16	// explode splits s into a slice of UTF-8 strings,
    17	// one string per Unicode character up to a maximum of n (n < 0 means no limit).
    18	// Invalid UTF-8 sequences become correct encodings of U+FFFD.
    19	func explode(s string, n int) []string {
    20		l := utf8.RuneCountInString(s)
    21		if n < 0 || n > l {
    22			n = l
    23		}
    24		a := make([]string, n)
    25		for i := 0; i < n-1; i++ {
    26			ch, size := utf8.DecodeRuneInString(s)
    27			a[i] = s[:size]
    28			s = s[size:]
    29			if ch == utf8.RuneError {
    30				a[i] = string(utf8.RuneError)
    31			}
    32		}
    33		if n > 0 {
    34			a[n-1] = s
    35		}
    36		return a
    37	}
    38	
    39	// primeRK is the prime base used in Rabin-Karp algorithm.
    40	const primeRK = 16777619
    41	
    42	// hashStr returns the hash and the appropriate multiplicative
    43	// factor for use in Rabin-Karp algorithm.
    44	func hashStr(sep string) (uint32, uint32) {
    45		hash := uint32(0)
    46		for i := 0; i < len(sep); i++ {
    47			hash = hash*primeRK + uint32(sep[i])
    48		}
    49		var pow, sq uint32 = 1, primeRK
    50		for i := len(sep); i > 0; i >>= 1 {
    51			if i&1 != 0 {
    52				pow *= sq
    53			}
    54			sq *= sq
    55		}
    56		return hash, pow
    57	}
    58	
    59	// hashStrRev returns the hash of the reverse of sep and the
    60	// appropriate multiplicative factor for use in Rabin-Karp algorithm.
    61	func hashStrRev(sep string) (uint32, uint32) {
    62		hash := uint32(0)
    63		for i := len(sep) - 1; i >= 0; i-- {
    64			hash = hash*primeRK + uint32(sep[i])
    65		}
    66		var pow, sq uint32 = 1, primeRK
    67		for i := len(sep); i > 0; i >>= 1 {
    68			if i&1 != 0 {
    69				pow *= sq
    70			}
    71			sq *= sq
    72		}
    73		return hash, pow
    74	}
    75	
    76	// Count counts the number of non-overlapping instances of substr in s.
    77	// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
    78	func Count(s, substr string) int {
    79		// special case
    80		if len(substr) == 0 {
    81			return utf8.RuneCountInString(s) + 1
    82		}
    83		if len(substr) == 1 {
    84			return bytealg.CountString(s, substr[0])
    85		}
    86		n := 0
    87		for {
    88			i := Index(s, substr)
    89			if i == -1 {
    90				return n
    91			}
    92			n++
    93			s = s[i+len(substr):]
    94		}
    95	}
    96	
    97	// Contains reports whether substr is within s.
    98	func Contains(s, substr string) bool {
    99		return Index(s, substr) >= 0
   100	}
   101	
   102	// ContainsAny reports whether any Unicode code points in chars are within s.
   103	func ContainsAny(s, chars string) bool {
   104		return IndexAny(s, chars) >= 0
   105	}
   106	
   107	// ContainsRune reports whether the Unicode code point r is within s.
   108	func ContainsRune(s string, r rune) bool {
   109		return IndexRune(s, r) >= 0
   110	}
   111	
   112	// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
   113	func LastIndex(s, substr string) int {
   114		n := len(substr)
   115		switch {
   116		case n == 0:
   117			return len(s)
   118		case n == 1:
   119			return LastIndexByte(s, substr[0])
   120		case n == len(s):
   121			if substr == s {
   122				return 0
   123			}
   124			return -1
   125		case n > len(s):
   126			return -1
   127		}
   128		// Rabin-Karp search from the end of the string
   129		hashss, pow := hashStrRev(substr)
   130		last := len(s) - n
   131		var h uint32
   132		for i := len(s) - 1; i >= last; i-- {
   133			h = h*primeRK + uint32(s[i])
   134		}
   135		if h == hashss && s[last:] == substr {
   136			return last
   137		}
   138		for i := last - 1; i >= 0; i-- {
   139			h *= primeRK
   140			h += uint32(s[i])
   141			h -= pow * uint32(s[i+n])
   142			if h == hashss && s[i:i+n] == substr {
   143				return i
   144			}
   145		}
   146		return -1
   147	}
   148	
   149	// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
   150	func IndexByte(s string, c byte) int {
   151		return bytealg.IndexByteString(s, c)
   152	}
   153	
   154	// IndexRune returns the index of the first instance of the Unicode code point
   155	// r, or -1 if rune is not present in s.
   156	// If r is utf8.RuneError, it returns the first instance of any
   157	// invalid UTF-8 byte sequence.
   158	func IndexRune(s string, r rune) int {
   159		switch {
   160		case 0 <= r && r < utf8.RuneSelf:
   161			return IndexByte(s, byte(r))
   162		case r == utf8.RuneError:
   163			for i, r := range s {
   164				if r == utf8.RuneError {
   165					return i
   166				}
   167			}
   168			return -1
   169		case !utf8.ValidRune(r):
   170			return -1
   171		default:
   172			return Index(s, string(r))
   173		}
   174	}
   175	
   176	// IndexAny returns the index of the first instance of any Unicode code point
   177	// from chars in s, or -1 if no Unicode code point from chars is present in s.
   178	func IndexAny(s, chars string) int {
   179		if chars == "" {
   180			// Avoid scanning all of s.
   181			return -1
   182		}
   183		if len(s) > 8 {
   184			if as, isASCII := makeASCIISet(chars); isASCII {
   185				for i := 0; i < len(s); i++ {
   186					if as.contains(s[i]) {
   187						return i
   188					}
   189				}
   190				return -1
   191			}
   192		}
   193		for i, c := range s {
   194			for _, m := range chars {
   195				if c == m {
   196					return i
   197				}
   198			}
   199		}
   200		return -1
   201	}
   202	
   203	// LastIndexAny returns the index of the last instance of any Unicode code
   204	// point from chars in s, or -1 if no Unicode code point from chars is
   205	// present in s.
   206	func LastIndexAny(s, chars string) int {
   207		if chars == "" {
   208			// Avoid scanning all of s.
   209			return -1
   210		}
   211		if len(s) > 8 {
   212			if as, isASCII := makeASCIISet(chars); isASCII {
   213				for i := len(s) - 1; i >= 0; i-- {
   214					if as.contains(s[i]) {
   215						return i
   216					}
   217				}
   218				return -1
   219			}
   220		}
   221		for i := len(s); i > 0; {
   222			r, size := utf8.DecodeLastRuneInString(s[:i])
   223			i -= size
   224			for _, c := range chars {
   225				if r == c {
   226					return i
   227				}
   228			}
   229		}
   230		return -1
   231	}
   232	
   233	// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
   234	func LastIndexByte(s string, c byte) int {
   235		for i := len(s) - 1; i >= 0; i-- {
   236			if s[i] == c {
   237				return i
   238			}
   239		}
   240		return -1
   241	}
   242	
   243	// Generic split: splits after each instance of sep,
   244	// including sepSave bytes of sep in the subarrays.
   245	func genSplit(s, sep string, sepSave, n int) []string {
   246		if n == 0 {
   247			return nil
   248		}
   249		if sep == "" {
   250			return explode(s, n)
   251		}
   252		if n < 0 {
   253			n = Count(s, sep) + 1
   254		}
   255	
   256		a := make([]string, n)
   257		n--
   258		i := 0
   259		for i < n {
   260			m := Index(s, sep)
   261			if m < 0 {
   262				break
   263			}
   264			a[i] = s[:m+sepSave]
   265			s = s[m+len(sep):]
   266			i++
   267		}
   268		a[i] = s
   269		return a[:i+1]
   270	}
   271	
   272	// SplitN slices s into substrings separated by sep and returns a slice of
   273	// the substrings between those separators.
   274	//
   275	// The count determines the number of substrings to return:
   276	//   n > 0: at most n substrings; the last substring will be the unsplit remainder.
   277	//   n == 0: the result is nil (zero substrings)
   278	//   n < 0: all substrings
   279	//
   280	// Edge cases for s and sep (for example, empty strings) are handled
   281	// as described in the documentation for Split.
   282	func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
   283	
   284	// SplitAfterN slices s into substrings after each instance of sep and
   285	// returns a slice of those substrings.
   286	//
   287	// The count determines the number of substrings to return:
   288	//   n > 0: at most n substrings; the last substring will be the unsplit remainder.
   289	//   n == 0: the result is nil (zero substrings)
   290	//   n < 0: all substrings
   291	//
   292	// Edge cases for s and sep (for example, empty strings) are handled
   293	// as described in the documentation for SplitAfter.
   294	func SplitAfterN(s, sep string, n int) []string {
   295		return genSplit(s, sep, len(sep), n)
   296	}
   297	
   298	// Split slices s into all substrings separated by sep and returns a slice of
   299	// the substrings between those separators.
   300	//
   301	// If s does not contain sep and sep is not empty, Split returns a
   302	// slice of length 1 whose only element is s.
   303	//
   304	// If sep is empty, Split splits after each UTF-8 sequence. If both s
   305	// and sep are empty, Split returns an empty slice.
   306	//
   307	// It is equivalent to SplitN with a count of -1.
   308	func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
   309	
   310	// SplitAfter slices s into all substrings after each instance of sep and
   311	// returns a slice of those substrings.
   312	//
   313	// If s does not contain sep and sep is not empty, SplitAfter returns
   314	// a slice of length 1 whose only element is s.
   315	//
   316	// If sep is empty, SplitAfter splits after each UTF-8 sequence. If
   317	// both s and sep are empty, SplitAfter returns an empty slice.
   318	//
   319	// It is equivalent to SplitAfterN with a count of -1.
   320	func SplitAfter(s, sep string) []string {
   321		return genSplit(s, sep, len(sep), -1)
   322	}
   323	
   324	var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
   325	
   326	// Fields splits the string s around each instance of one or more consecutive white space
   327	// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
   328	// empty slice if s contains only white space.
   329	func Fields(s string) []string {
   330		// First count the fields.
   331		// This is an exact count if s is ASCII, otherwise it is an approximation.
   332		n := 0
   333		wasSpace := 1
   334		// setBits is used to track which bits are set in the bytes of s.
   335		setBits := uint8(0)
   336		for i := 0; i < len(s); i++ {
   337			r := s[i]
   338			setBits |= r
   339			isSpace := int(asciiSpace[r])
   340			n += wasSpace & ^isSpace
   341			wasSpace = isSpace
   342		}
   343	
   344		if setBits >= utf8.RuneSelf {
   345			// Some runes in the input string are not ASCII.
   346			return FieldsFunc(s, unicode.IsSpace)
   347		}
   348		// ASCII fast path
   349		a := make([]string, n)
   350		na := 0
   351		fieldStart := 0
   352		i := 0
   353		// Skip spaces in the front of the input.
   354		for i < len(s) && asciiSpace[s[i]] != 0 {
   355			i++
   356		}
   357		fieldStart = i
   358		for i < len(s) {
   359			if asciiSpace[s[i]] == 0 {
   360				i++
   361				continue
   362			}
   363			a[na] = s[fieldStart:i]
   364			na++
   365			i++
   366			// Skip spaces in between fields.
   367			for i < len(s) && asciiSpace[s[i]] != 0 {
   368				i++
   369			}
   370			fieldStart = i
   371		}
   372		if fieldStart < len(s) { // Last field might end at EOF.
   373			a[na] = s[fieldStart:]
   374		}
   375		return a
   376	}
   377	
   378	// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
   379	// and returns an array of slices of s. If all code points in s satisfy f(c) or the
   380	// string is empty, an empty slice is returned.
   381	// FieldsFunc makes no guarantees about the order in which it calls f(c).
   382	// If f does not return consistent results for a given c, FieldsFunc may crash.
   383	func FieldsFunc(s string, f func(rune) bool) []string {
   384		// A span is used to record a slice of s of the form s[start:end].
   385		// The start index is inclusive and the end index is exclusive.
   386		type span struct {
   387			start int
   388			end   int
   389		}
   390		spans := make([]span, 0, 32)
   391	
   392		// Find the field start and end indices.
   393		wasField := false
   394		fromIndex := 0
   395		for i, rune := range s {
   396			if f(rune) {
   397				if wasField {
   398					spans = append(spans, span{start: fromIndex, end: i})
   399					wasField = false
   400				}
   401			} else {
   402				if !wasField {
   403					fromIndex = i
   404					wasField = true
   405				}
   406			}
   407		}
   408	
   409		// Last field might end at EOF.
   410		if wasField {
   411			spans = append(spans, span{fromIndex, len(s)})
   412		}
   413	
   414		// Create strings from recorded field indices.
   415		a := make([]string, len(spans))
   416		for i, span := range spans {
   417			a[i] = s[span.start:span.end]
   418		}
   419	
   420		return a
   421	}
   422	
   423	// Join concatenates the elements of a to create a single string. The separator string
   424	// sep is placed between elements in the resulting string.
   425	func Join(a []string, sep string) string {
   426		switch len(a) {
   427		case 0:
   428			return ""
   429		case 1:
   430			return a[0]
   431		}
   432		n := len(sep) * (len(a) - 1)
   433		for i := 0; i < len(a); i++ {
   434			n += len(a[i])
   435		}
   436	
   437		var b Builder
   438		b.Grow(n)
   439		b.WriteString(a[0])
   440		for _, s := range a[1:] {
   441			b.WriteString(sep)
   442			b.WriteString(s)
   443		}
   444		return b.String()
   445	}
   446	
   447	// HasPrefix tests whether the string s begins with prefix.
   448	func HasPrefix(s, prefix string) bool {
   449		return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
   450	}
   451	
   452	// HasSuffix tests whether the string s ends with suffix.
   453	func HasSuffix(s, suffix string) bool {
   454		return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
   455	}
   456	
   457	// Map returns a copy of the string s with all its characters modified
   458	// according to the mapping function. If mapping returns a negative value, the character is
   459	// dropped from the string with no replacement.
   460	func Map(mapping func(rune) rune, s string) string {
   461		// In the worst case, the string can grow when mapped, making
   462		// things unpleasant. But it's so rare we barge in assuming it's
   463		// fine. It could also shrink but that falls out naturally.
   464	
   465		// The output buffer b is initialized on demand, the first
   466		// time a character differs.
   467		var b Builder
   468	
   469		for i, c := range s {
   470			r := mapping(c)
   471			if r == c && c != utf8.RuneError {
   472				continue
   473			}
   474	
   475			var width int
   476			if c == utf8.RuneError {
   477				c, width = utf8.DecodeRuneInString(s[i:])
   478				if width != 1 && r == c {
   479					continue
   480				}
   481			} else {
   482				width = utf8.RuneLen(c)
   483			}
   484	
   485			b.Grow(len(s) + utf8.UTFMax)
   486			b.WriteString(s[:i])
   487			if r >= 0 {
   488				b.WriteRune(r)
   489			}
   490	
   491			s = s[i+width:]
   492			break
   493		}
   494	
   495		// Fast path for unchanged input
   496		if b.Cap() == 0 { // didn't call b.Grow above
   497			return s
   498		}
   499	
   500		for _, c := range s {
   501			r := mapping(c)
   502	
   503			if r >= 0 {
   504				// common case
   505				// Due to inlining, it is more performant to determine if WriteByte should be
   506				// invoked rather than always call WriteRune
   507				if r < utf8.RuneSelf {
   508					b.WriteByte(byte(r))
   509				} else {
   510					// r is not a ASCII rune.
   511					b.WriteRune(r)
   512				}
   513			}
   514		}
   515	
   516		return b.String()
   517	}
   518	
   519	// Repeat returns a new string consisting of count copies of the string s.
   520	//
   521	// It panics if count is negative or if
   522	// the result of (len(s) * count) overflows.
   523	func Repeat(s string, count int) string {
   524		if count == 0 {
   525			return ""
   526		}
   527	
   528		// Since we cannot return an error on overflow,
   529		// we should panic if the repeat will generate
   530		// an overflow.
   531		// See Issue golang.org/issue/16237
   532		if count < 0 {
   533			panic("strings: negative Repeat count")
   534		} else if len(s)*count/count != len(s) {
   535			panic("strings: Repeat count causes overflow")
   536		}
   537	
   538		n := len(s) * count
   539		var b Builder
   540		b.Grow(n)
   541		b.WriteString(s)
   542		for b.Len() < n {
   543			if b.Len() <= n/2 {
   544				b.WriteString(b.String())
   545			} else {
   546				b.WriteString(b.String()[:n-b.Len()])
   547				break
   548			}
   549		}
   550		return b.String()
   551	}
   552	
   553	// ToUpper returns s with all Unicode letters mapped to their upper case.
   554	func ToUpper(s string) string {
   555		isASCII, hasLower := true, false
   556		for i := 0; i < len(s); i++ {
   557			c := s[i]
   558			if c >= utf8.RuneSelf {
   559				isASCII = false
   560				break
   561			}
   562			hasLower = hasLower || ('a' <= c && c <= 'z')
   563		}
   564	
   565		if isASCII { // optimize for ASCII-only strings.
   566			if !hasLower {
   567				return s
   568			}
   569			var b Builder
   570			b.Grow(len(s))
   571			for i := 0; i < len(s); i++ {
   572				c := s[i]
   573				if 'a' <= c && c <= 'z' {
   574					c -= 'a' - 'A'
   575				}
   576				b.WriteByte(c)
   577			}
   578			return b.String()
   579		}
   580		return Map(unicode.ToUpper, s)
   581	}
   582	
   583	// ToLower returns s with all Unicode letters mapped to their lower case.
   584	func ToLower(s string) string {
   585		isASCII, hasUpper := true, false
   586		for i := 0; i < len(s); i++ {
   587			c := s[i]
   588			if c >= utf8.RuneSelf {
   589				isASCII = false
   590				break
   591			}
   592			hasUpper = hasUpper || ('A' <= c && c <= 'Z')
   593		}
   594	
   595		if isASCII { // optimize for ASCII-only strings.
   596			if !hasUpper {
   597				return s
   598			}
   599			var b Builder
   600			b.Grow(len(s))
   601			for i := 0; i < len(s); i++ {
   602				c := s[i]
   603				if 'A' <= c && c <= 'Z' {
   604					c += 'a' - 'A'
   605				}
   606				b.WriteByte(c)
   607			}
   608			return b.String()
   609		}
   610		return Map(unicode.ToLower, s)
   611	}
   612	
   613	// ToTitle returns a copy of the string s with all Unicode letters mapped to
   614	// their Unicode title case.
   615	func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
   616	
   617	// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
   618	// upper case using the case mapping specified by c.
   619	func ToUpperSpecial(c unicode.SpecialCase, s string) string {
   620		return Map(c.ToUpper, s)
   621	}
   622	
   623	// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
   624	// lower case using the case mapping specified by c.
   625	func ToLowerSpecial(c unicode.SpecialCase, s string) string {
   626		return Map(c.ToLower, s)
   627	}
   628	
   629	// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
   630	// Unicode title case, giving priority to the special casing rules.
   631	func ToTitleSpecial(c unicode.SpecialCase, s string) string {
   632		return Map(c.ToTitle, s)
   633	}
   634	
   635	// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences
   636	// replaced by the replacement string, which may be empty.
   637	func ToValidUTF8(s, replacement string) string {
   638		var b Builder
   639	
   640		for i, c := range s {
   641			if c != utf8.RuneError {
   642				continue
   643			}
   644	
   645			_, wid := utf8.DecodeRuneInString(s[i:])
   646			if wid == 1 {
   647				b.Grow(len(s) + len(replacement))
   648				b.WriteString(s[:i])
   649				s = s[i:]
   650				break
   651			}
   652		}
   653	
   654		// Fast path for unchanged input
   655		if b.Cap() == 0 { // didn't call b.Grow above
   656			return s
   657		}
   658	
   659		invalid := false // previous byte was from an invalid UTF-8 sequence
   660		for i := 0; i < len(s); {
   661			c := s[i]
   662			if c < utf8.RuneSelf {
   663				i++
   664				invalid = false
   665				b.WriteByte(c)
   666				continue
   667			}
   668			_, wid := utf8.DecodeRuneInString(s[i:])
   669			if wid == 1 {
   670				i++
   671				if !invalid {
   672					invalid = true
   673					b.WriteString(replacement)
   674				}
   675				continue
   676			}
   677			invalid = false
   678			b.WriteString(s[i : i+wid])
   679			i += wid
   680		}
   681	
   682		return b.String()
   683	}
   684	
   685	// isSeparator reports whether the rune could mark a word boundary.
   686	// TODO: update when package unicode captures more of the properties.
   687	func isSeparator(r rune) bool {
   688		// ASCII alphanumerics and underscore are not separators
   689		if r <= 0x7F {
   690			switch {
   691			case '0' <= r && r <= '9':
   692				return false
   693			case 'a' <= r && r <= 'z':
   694				return false
   695			case 'A' <= r && r <= 'Z':
   696				return false
   697			case r == '_':
   698				return false
   699			}
   700			return true
   701		}
   702		// Letters and digits are not separators
   703		if unicode.IsLetter(r) || unicode.IsDigit(r) {
   704			return false
   705		}
   706		// Otherwise, all we can do for now is treat spaces as separators.
   707		return unicode.IsSpace(r)
   708	}
   709	
   710	// Title returns a copy of the string s with all Unicode letters that begin words
   711	// mapped to their Unicode title case.
   712	//
   713	// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
   714	func Title(s string) string {
   715		// Use a closure here to remember state.
   716		// Hackish but effective. Depends on Map scanning in order and calling
   717		// the closure once per rune.
   718		prev := ' '
   719		return Map(
   720			func(r rune) rune {
   721				if isSeparator(prev) {
   722					prev = r
   723					return unicode.ToTitle(r)
   724				}
   725				prev = r
   726				return r
   727			},
   728			s)
   729	}
   730	
   731	// TrimLeftFunc returns a slice of the string s with all leading
   732	// Unicode code points c satisfying f(c) removed.
   733	func TrimLeftFunc(s string, f func(rune) bool) string {
   734		i := indexFunc(s, f, false)
   735		if i == -1 {
   736			return ""
   737		}
   738		return s[i:]
   739	}
   740	
   741	// TrimRightFunc returns a slice of the string s with all trailing
   742	// Unicode code points c satisfying f(c) removed.
   743	func TrimRightFunc(s string, f func(rune) bool) string {
   744		i := lastIndexFunc(s, f, false)
   745		if i >= 0 && s[i] >= utf8.RuneSelf {
   746			_, wid := utf8.DecodeRuneInString(s[i:])
   747			i += wid
   748		} else {
   749			i++
   750		}
   751		return s[0:i]
   752	}
   753	
   754	// TrimFunc returns a slice of the string s with all leading
   755	// and trailing Unicode code points c satisfying f(c) removed.
   756	func TrimFunc(s string, f func(rune) bool) string {
   757		return TrimRightFunc(TrimLeftFunc(s, f), f)
   758	}
   759	
   760	// IndexFunc returns the index into s of the first Unicode
   761	// code point satisfying f(c), or -1 if none do.
   762	func IndexFunc(s string, f func(rune) bool) int {
   763		return indexFunc(s, f, true)
   764	}
   765	
   766	// LastIndexFunc returns the index into s of the last
   767	// Unicode code point satisfying f(c), or -1 if none do.
   768	func LastIndexFunc(s string, f func(rune) bool) int {
   769		return lastIndexFunc(s, f, true)
   770	}
   771	
   772	// indexFunc is the same as IndexFunc except that if
   773	// truth==false, the sense of the predicate function is
   774	// inverted.
   775	func indexFunc(s string, f func(rune) bool, truth bool) int {
   776		for i, r := range s {
   777			if f(r) == truth {
   778				return i
   779			}
   780		}
   781		return -1
   782	}
   783	
   784	// lastIndexFunc is the same as LastIndexFunc except that if
   785	// truth==false, the sense of the predicate function is
   786	// inverted.
   787	func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
   788		for i := len(s); i > 0; {
   789			r, size := utf8.DecodeLastRuneInString(s[0:i])
   790			i -= size
   791			if f(r) == truth {
   792				return i
   793			}
   794		}
   795		return -1
   796	}
   797	
   798	// asciiSet is a 32-byte value, where each bit represents the presence of a
   799	// given ASCII character in the set. The 128-bits of the lower 16 bytes,
   800	// starting with the least-significant bit of the lowest word to the
   801	// most-significant bit of the highest word, map to the full range of all
   802	// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
   803	// ensuring that any non-ASCII character will be reported as not in the set.
   804	type asciiSet [8]uint32
   805	
   806	// makeASCIISet creates a set of ASCII characters and reports whether all
   807	// characters in chars are ASCII.
   808	func makeASCIISet(chars string) (as asciiSet, ok bool) {
   809		for i := 0; i < len(chars); i++ {
   810			c := chars[i]
   811			if c >= utf8.RuneSelf {
   812				return as, false
   813			}
   814			as[c>>5] |= 1 << uint(c&31)
   815		}
   816		return as, true
   817	}
   818	
   819	// contains reports whether c is inside the set.
   820	func (as *asciiSet) contains(c byte) bool {
   821		return (as[c>>5] & (1 << uint(c&31))) != 0
   822	}
   823	
   824	func makeCutsetFunc(cutset string) func(rune) bool {
   825		if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
   826			return func(r rune) bool {
   827				return r == rune(cutset[0])
   828			}
   829		}
   830		if as, isASCII := makeASCIISet(cutset); isASCII {
   831			return func(r rune) bool {
   832				return r < utf8.RuneSelf && as.contains(byte(r))
   833			}
   834		}
   835		return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
   836	}
   837	
   838	// Trim returns a slice of the string s with all leading and
   839	// trailing Unicode code points contained in cutset removed.
   840	func Trim(s string, cutset string) string {
   841		if s == "" || cutset == "" {
   842			return s
   843		}
   844		return TrimFunc(s, makeCutsetFunc(cutset))
   845	}
   846	
   847	// TrimLeft returns a slice of the string s with all leading
   848	// Unicode code points contained in cutset removed.
   849	//
   850	// To remove a prefix, use TrimPrefix instead.
   851	func TrimLeft(s string, cutset string) string {
   852		if s == "" || cutset == "" {
   853			return s
   854		}
   855		return TrimLeftFunc(s, makeCutsetFunc(cutset))
   856	}
   857	
   858	// TrimRight returns a slice of the string s, with all trailing
   859	// Unicode code points contained in cutset removed.
   860	//
   861	// To remove a suffix, use TrimSuffix instead.
   862	func TrimRight(s string, cutset string) string {
   863		if s == "" || cutset == "" {
   864			return s
   865		}
   866		return TrimRightFunc(s, makeCutsetFunc(cutset))
   867	}
   868	
   869	// TrimSpace returns a slice of the string s, with all leading
   870	// and trailing white space removed, as defined by Unicode.
   871	func TrimSpace(s string) string {
   872		// Fast path for ASCII: look for the first ASCII non-space byte
   873		start := 0
   874		for ; start < len(s); start++ {
   875			c := s[start]
   876			if c >= utf8.RuneSelf {
   877				// If we run into a non-ASCII byte, fall back to the
   878				// slower unicode-aware method on the remaining bytes
   879				return TrimFunc(s[start:], unicode.IsSpace)
   880			}
   881			if asciiSpace[c] == 0 {
   882				break
   883			}
   884		}
   885	
   886		// Now look for the first ASCII non-space byte from the end
   887		stop := len(s)
   888		for ; stop > start; stop-- {
   889			c := s[stop-1]
   890			if c >= utf8.RuneSelf {
   891				return TrimFunc(s[start:stop], unicode.IsSpace)
   892			}
   893			if asciiSpace[c] == 0 {
   894				break
   895			}
   896		}
   897	
   898		// At this point s[start:stop] starts and ends with an ASCII
   899		// non-space bytes, so we're done. Non-ASCII cases have already
   900		// been handled above.
   901		return s[start:stop]
   902	}
   903	
   904	// TrimPrefix returns s without the provided leading prefix string.
   905	// If s doesn't start with prefix, s is returned unchanged.
   906	func TrimPrefix(s, prefix string) string {
   907		if HasPrefix(s, prefix) {
   908			return s[len(prefix):]
   909		}
   910		return s
   911	}
   912	
   913	// TrimSuffix returns s without the provided trailing suffix string.
   914	// If s doesn't end with suffix, s is returned unchanged.
   915	func TrimSuffix(s, suffix string) string {
   916		if HasSuffix(s, suffix) {
   917			return s[:len(s)-len(suffix)]
   918		}
   919		return s
   920	}
   921	
   922	// Replace returns a copy of the string s with the first n
   923	// non-overlapping instances of old replaced by new.
   924	// If old is empty, it matches at the beginning of the string
   925	// and after each UTF-8 sequence, yielding up to k+1 replacements
   926	// for a k-rune string.
   927	// If n < 0, there is no limit on the number of replacements.
   928	func Replace(s, old, new string, n int) string {
   929		if old == new || n == 0 {
   930			return s // avoid allocation
   931		}
   932	
   933		// Compute number of replacements.
   934		if m := Count(s, old); m == 0 {
   935			return s // avoid allocation
   936		} else if n < 0 || m < n {
   937			n = m
   938		}
   939	
   940		// Apply replacements to buffer.
   941		t := make([]byte, len(s)+n*(len(new)-len(old)))
   942		w := 0
   943		start := 0
   944		for i := 0; i < n; i++ {
   945			j := start
   946			if len(old) == 0 {
   947				if i > 0 {
   948					_, wid := utf8.DecodeRuneInString(s[start:])
   949					j += wid
   950				}
   951			} else {
   952				j += Index(s[start:], old)
   953			}
   954			w += copy(t[w:], s[start:j])
   955			w += copy(t[w:], new)
   956			start = j + len(old)
   957		}
   958		w += copy(t[w:], s[start:])
   959		return string(t[0:w])
   960	}
   961	
   962	// ReplaceAll returns a copy of the string s with all
   963	// non-overlapping instances of old replaced by new.
   964	// If old is empty, it matches at the beginning of the string
   965	// and after each UTF-8 sequence, yielding up to k+1 replacements
   966	// for a k-rune string.
   967	func ReplaceAll(s, old, new string) string {
   968		return Replace(s, old, new, -1)
   969	}
   970	
   971	// EqualFold reports whether s and t, interpreted as UTF-8 strings,
   972	// are equal under Unicode case-folding.
   973	func EqualFold(s, t string) bool {
   974		for s != "" && t != "" {
   975			// Extract first rune from each string.
   976			var sr, tr rune
   977			if s[0] < utf8.RuneSelf {
   978				sr, s = rune(s[0]), s[1:]
   979			} else {
   980				r, size := utf8.DecodeRuneInString(s)
   981				sr, s = r, s[size:]
   982			}
   983			if t[0] < utf8.RuneSelf {
   984				tr, t = rune(t[0]), t[1:]
   985			} else {
   986				r, size := utf8.DecodeRuneInString(t)
   987				tr, t = r, t[size:]
   988			}
   989	
   990			// If they match, keep going; if not, return false.
   991	
   992			// Easy case.
   993			if tr == sr {
   994				continue
   995			}
   996	
   997			// Make sr < tr to simplify what follows.
   998			if tr < sr {
   999				tr, sr = sr, tr
  1000			}
  1001			// Fast check for ASCII.
  1002			if tr < utf8.RuneSelf {
  1003				// ASCII only, sr/tr must be upper/lower case
  1004				if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
  1005					continue
  1006				}
  1007				return false
  1008			}
  1009	
  1010			// General case. SimpleFold(x) returns the next equivalent rune > x
  1011			// or wraps around to smaller values.
  1012			r := unicode.SimpleFold(sr)
  1013			for r != sr && r < tr {
  1014				r = unicode.SimpleFold(r)
  1015			}
  1016			if r == tr {
  1017				continue
  1018			}
  1019			return false
  1020		}
  1021	
  1022		// One string is empty. Are both?
  1023		return s == t
  1024	}
  1025	
  1026	// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
  1027	func Index(s, substr string) int {
  1028		n := len(substr)
  1029		switch {
  1030		case n == 0:
  1031			return 0
  1032		case n == 1:
  1033			return IndexByte(s, substr[0])
  1034		case n == len(s):
  1035			if substr == s {
  1036				return 0
  1037			}
  1038			return -1
  1039		case n > len(s):
  1040			return -1
  1041		case n <= bytealg.MaxLen:
  1042			// Use brute force when s and substr both are small
  1043			if len(s) <= bytealg.MaxBruteForce {
  1044				return bytealg.IndexString(s, substr)
  1045			}
  1046			c0 := substr[0]
  1047			c1 := substr[1]
  1048			i := 0
  1049			t := len(s) - n + 1
  1050			fails := 0
  1051			for i < t {
  1052				if s[i] != c0 {
  1053					// IndexByte is faster than bytealg.IndexString, so use it as long as
  1054					// we're not getting lots of false positives.
  1055					o := IndexByte(s[i:t], c0)
  1056					if o < 0 {
  1057						return -1
  1058					}
  1059					i += o
  1060				}
  1061				if s[i+1] == c1 && s[i:i+n] == substr {
  1062					return i
  1063				}
  1064				fails++
  1065				i++
  1066				// Switch to bytealg.IndexString when IndexByte produces too many false positives.
  1067				if fails > bytealg.Cutover(i) {
  1068					r := bytealg.IndexString(s[i:], substr)
  1069					if r >= 0 {
  1070						return r + i
  1071					}
  1072					return -1
  1073				}
  1074			}
  1075			return -1
  1076		}
  1077		c0 := substr[0]
  1078		c1 := substr[1]
  1079		i := 0
  1080		t := len(s) - n + 1
  1081		fails := 0
  1082		for i < t {
  1083			if s[i] != c0 {
  1084				o := IndexByte(s[i:t], c0)
  1085				if o < 0 {
  1086					return -1
  1087				}
  1088				i += o
  1089			}
  1090			if s[i+1] == c1 && s[i:i+n] == substr {
  1091				return i
  1092			}
  1093			i++
  1094			fails++
  1095			if fails >= 4+i>>4 && i < t {
  1096				// See comment in ../bytes/bytes_generic.go.
  1097				j := indexRabinKarp(s[i:], substr)
  1098				if j < 0 {
  1099					return -1
  1100				}
  1101				return i + j
  1102			}
  1103		}
  1104		return -1
  1105	}
  1106	
  1107	func indexRabinKarp(s, substr string) int {
  1108		// Rabin-Karp search
  1109		hashss, pow := hashStr(substr)
  1110		n := len(substr)
  1111		var h uint32
  1112		for i := 0; i < n; i++ {
  1113			h = h*primeRK + uint32(s[i])
  1114		}
  1115		if h == hashss && s[:n] == substr {
  1116			return 0
  1117		}
  1118		for i := n; i < len(s); {
  1119			h *= primeRK
  1120			h += uint32(s[i])
  1121			h -= pow * uint32(s[i-n])
  1122			i++
  1123			if h == hashss && s[i-n:i] == substr {
  1124				return i - n
  1125			}
  1126		}
  1127		return -1
  1128	}
  1129
View as plain text