...

Source file src/pkg/unicode/graphic.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package unicode
     6	
     7	// Bit masks for each code point under U+0100, for fast lookup.
     8	const (
     9		pC     = 1 << iota // a control character.
    10		pP                 // a punctuation character.
    11		pN                 // a numeral.
    12		pS                 // a symbolic character.
    13		pZ                 // a spacing character.
    14		pLu                // an upper-case letter.
    15		pLl                // a lower-case letter.
    16		pp                 // a printable character according to Go's definition.
    17		pg     = pp | pZ   // a graphical character according to the Unicode definition.
    18		pLo    = pLl | pLu // a letter that is neither upper nor lower case.
    19		pLmask = pLo
    20	)
    21	
    22	// GraphicRanges defines the set of graphic characters according to Unicode.
    23	var GraphicRanges = []*RangeTable{
    24		L, M, N, P, S, Zs,
    25	}
    26	
    27	// PrintRanges defines the set of printable characters according to Go.
    28	// ASCII space, U+0020, is handled separately.
    29	var PrintRanges = []*RangeTable{
    30		L, M, N, P, S,
    31	}
    32	
    33	// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
    34	// Such characters include letters, marks, numbers, punctuation, symbols, and
    35	// spaces, from categories L, M, N, P, S, Zs.
    36	func IsGraphic(r rune) bool {
    37		// We convert to uint32 to avoid the extra test for negative,
    38		// and in the index we convert to uint8 to avoid the range check.
    39		if uint32(r) <= MaxLatin1 {
    40			return properties[uint8(r)]&pg != 0
    41		}
    42		return In(r, GraphicRanges...)
    43	}
    44	
    45	// IsPrint reports whether the rune is defined as printable by Go. Such
    46	// characters include letters, marks, numbers, punctuation, symbols, and the
    47	// ASCII space character, from categories L, M, N, P, S and the ASCII space
    48	// character. This categorization is the same as IsGraphic except that the
    49	// only spacing character is ASCII space, U+0020.
    50	func IsPrint(r rune) bool {
    51		if uint32(r) <= MaxLatin1 {
    52			return properties[uint8(r)]&pp != 0
    53		}
    54		return In(r, PrintRanges...)
    55	}
    56	
    57	// IsOneOf reports whether the rune is a member of one of the ranges.
    58	// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
    59	func IsOneOf(ranges []*RangeTable, r rune) bool {
    60		for _, inside := range ranges {
    61			if Is(inside, r) {
    62				return true
    63			}
    64		}
    65		return false
    66	}
    67	
    68	// In reports whether the rune is a member of one of the ranges.
    69	func In(r rune, ranges ...*RangeTable) bool {
    70		for _, inside := range ranges {
    71			if Is(inside, r) {
    72				return true
    73			}
    74		}
    75		return false
    76	}
    77	
    78	// IsControl reports whether the rune is a control character.
    79	// The C (Other) Unicode category includes more code points
    80	// such as surrogates; use Is(C, r) to test for them.
    81	func IsControl(r rune) bool {
    82		if uint32(r) <= MaxLatin1 {
    83			return properties[uint8(r)]&pC != 0
    84		}
    85		// All control characters are < MaxLatin1.
    86		return false
    87	}
    88	
    89	// IsLetter reports whether the rune is a letter (category L).
    90	func IsLetter(r rune) bool {
    91		if uint32(r) <= MaxLatin1 {
    92			return properties[uint8(r)]&(pLmask) != 0
    93		}
    94		return isExcludingLatin(Letter, r)
    95	}
    96	
    97	// IsMark reports whether the rune is a mark character (category M).
    98	func IsMark(r rune) bool {
    99		// There are no mark characters in Latin-1.
   100		return isExcludingLatin(Mark, r)
   101	}
   102	
   103	// IsNumber reports whether the rune is a number (category N).
   104	func IsNumber(r rune) bool {
   105		if uint32(r) <= MaxLatin1 {
   106			return properties[uint8(r)]&pN != 0
   107		}
   108		return isExcludingLatin(Number, r)
   109	}
   110	
   111	// IsPunct reports whether the rune is a Unicode punctuation character
   112	// (category P).
   113	func IsPunct(r rune) bool {
   114		if uint32(r) <= MaxLatin1 {
   115			return properties[uint8(r)]&pP != 0
   116		}
   117		return Is(Punct, r)
   118	}
   119	
   120	// IsSpace reports whether the rune is a space character as defined
   121	// by Unicode's White Space property; in the Latin-1 space
   122	// this is
   123	//	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
   124	// Other definitions of spacing characters are set by category
   125	// Z and property Pattern_White_Space.
   126	func IsSpace(r rune) bool {
   127		// This property isn't the same as Z; special-case it.
   128		if uint32(r) <= MaxLatin1 {
   129			switch r {
   130			case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   131				return true
   132			}
   133			return false
   134		}
   135		return isExcludingLatin(White_Space, r)
   136	}
   137	
   138	// IsSymbol reports whether the rune is a symbolic character.
   139	func IsSymbol(r rune) bool {
   140		if uint32(r) <= MaxLatin1 {
   141			return properties[uint8(r)]&pS != 0
   142		}
   143		return isExcludingLatin(Symbol, r)
   144	}
   145	

View as plain text