...

Source file src/encoding/json/fold.go

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package json
     6	
     7	import (
     8		"bytes"
     9		"unicode/utf8"
    10	)
    11	
    12	const (
    13		caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
    14		kelvin       = '\u212a'
    15		smallLongEss = '\u017f'
    16	)
    17	
    18	// foldFunc returns one of four different case folding equivalence
    19	// functions, from most general (and slow) to fastest:
    20	//
    21	// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
    22	// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
    23	// 3) asciiEqualFold, no special, but includes non-letters (including _)
    24	// 4) simpleLetterEqualFold, no specials, no non-letters.
    25	//
    26	// The letters S and K are special because they map to 3 runes, not just 2:
    27	//  * S maps to s and to U+017F 'ſ' Latin small letter long s
    28	//  * k maps to K and to U+212A 'K' Kelvin sign
    29	// See https://play.golang.org/p/tTxjOc0OGo
    30	//
    31	// The returned function is specialized for matching against s and
    32	// should only be given s. It's not curried for performance reasons.
    33	func foldFunc(s []byte) func(s, t []byte) bool {
    34		nonLetter := false
    35		special := false // special letter
    36		for _, b := range s {
    37			if b >= utf8.RuneSelf {
    38				return bytes.EqualFold
    39			}
    40			upper := b & caseMask
    41			if upper < 'A' || upper > 'Z' {
    42				nonLetter = true
    43			} else if upper == 'K' || upper == 'S' {
    44				// See above for why these letters are special.
    45				special = true
    46			}
    47		}
    48		if special {
    49			return equalFoldRight
    50		}
    51		if nonLetter {
    52			return asciiEqualFold
    53		}
    54		return simpleLetterEqualFold
    55	}
    56	
    57	// equalFoldRight is a specialization of bytes.EqualFold when s is
    58	// known to be all ASCII (including punctuation), but contains an 's',
    59	// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
    60	// See comments on foldFunc.
    61	func equalFoldRight(s, t []byte) bool {
    62		for _, sb := range s {
    63			if len(t) == 0 {
    64				return false
    65			}
    66			tb := t[0]
    67			if tb < utf8.RuneSelf {
    68				if sb != tb {
    69					sbUpper := sb & caseMask
    70					if 'A' <= sbUpper && sbUpper <= 'Z' {
    71						if sbUpper != tb&caseMask {
    72							return false
    73						}
    74					} else {
    75						return false
    76					}
    77				}
    78				t = t[1:]
    79				continue
    80			}
    81			// sb is ASCII and t is not. t must be either kelvin
    82			// sign or long s; sb must be s, S, k, or K.
    83			tr, size := utf8.DecodeRune(t)
    84			switch sb {
    85			case 's', 'S':
    86				if tr != smallLongEss {
    87					return false
    88				}
    89			case 'k', 'K':
    90				if tr != kelvin {
    91					return false
    92				}
    93			default:
    94				return false
    95			}
    96			t = t[size:]
    97	
    98		}
    99		if len(t) > 0 {
   100			return false
   101		}
   102		return true
   103	}
   104	
   105	// asciiEqualFold is a specialization of bytes.EqualFold for use when
   106	// s is all ASCII (but may contain non-letters) and contains no
   107	// special-folding letters.
   108	// See comments on foldFunc.
   109	func asciiEqualFold(s, t []byte) bool {
   110		if len(s) != len(t) {
   111			return false
   112		}
   113		for i, sb := range s {
   114			tb := t[i]
   115			if sb == tb {
   116				continue
   117			}
   118			if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
   119				if sb&caseMask != tb&caseMask {
   120					return false
   121				}
   122			} else {
   123				return false
   124			}
   125		}
   126		return true
   127	}
   128	
   129	// simpleLetterEqualFold is a specialization of bytes.EqualFold for
   130	// use when s is all ASCII letters (no underscores, etc) and also
   131	// doesn't contain 'k', 'K', 's', or 'S'.
   132	// See comments on foldFunc.
   133	func simpleLetterEqualFold(s, t []byte) bool {
   134		if len(s) != len(t) {
   135			return false
   136		}
   137		for i, b := range s {
   138			if b&caseMask != t[i]&caseMask {
   139				return false
   140			}
   141		}
   142		return true
   143	}
   144	

View as plain text