...

Source file src/pkg/vendor/golang.org/x/text/unicode/bidi/prop.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package bidi
     6	
     7	import "unicode/utf8"
     8	
     9	// Properties provides access to BiDi properties of runes.
    10	type Properties struct {
    11		entry uint8
    12		last  uint8
    13	}
    14	
    15	var trie = newBidiTrie(0)
    16	
    17	// TODO: using this for bidirule reduces the running time by about 5%. Consider
    18	// if this is worth exposing or if we can find a way to speed up the Class
    19	// method.
    20	//
    21	// // CompactClass is like Class, but maps all of the BiDi control classes
    22	// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
    23	// func (p Properties) CompactClass() Class {
    24	// 	return Class(p.entry & 0x0F)
    25	// }
    26	
    27	// Class returns the Bidi class for p.
    28	func (p Properties) Class() Class {
    29		c := Class(p.entry & 0x0F)
    30		if c == Control {
    31			c = controlByteToClass[p.last&0xF]
    32		}
    33		return c
    34	}
    35	
    36	// IsBracket reports whether the rune is a bracket.
    37	func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
    38	
    39	// IsOpeningBracket reports whether the rune is an opening bracket.
    40	// IsBracket must return true.
    41	func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
    42	
    43	// TODO: find a better API and expose.
    44	func (p Properties) reverseBracket(r rune) rune {
    45		return xorMasks[p.entry>>xorMaskShift] ^ r
    46	}
    47	
    48	var controlByteToClass = [16]Class{
    49		0xD: LRO, // U+202D LeftToRightOverride,
    50		0xE: RLO, // U+202E RightToLeftOverride,
    51		0xA: LRE, // U+202A LeftToRightEmbedding,
    52		0xB: RLE, // U+202B RightToLeftEmbedding,
    53		0xC: PDF, // U+202C PopDirectionalFormat,
    54		0x6: LRI, // U+2066 LeftToRightIsolate,
    55		0x7: RLI, // U+2067 RightToLeftIsolate,
    56		0x8: FSI, // U+2068 FirstStrongIsolate,
    57		0x9: PDI, // U+2069 PopDirectionalIsolate,
    58	}
    59	
    60	// LookupRune returns properties for r.
    61	func LookupRune(r rune) (p Properties, size int) {
    62		var buf [4]byte
    63		n := utf8.EncodeRune(buf[:], r)
    64		return Lookup(buf[:n])
    65	}
    66	
    67	// TODO: these lookup methods are based on the generated trie code. The returned
    68	// sizes have slightly different semantics from the generated code, in that it
    69	// always returns size==1 for an illegal UTF-8 byte (instead of the length
    70	// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
    71	// leave invalid UTF-8 untouched, in which case it has performance benefits to
    72	// do so (without changing the semantics). Bidi requires the semantics used here
    73	// for the bidirule implementation to be compatible with the Go semantics.
    74	//  They ultimately should perhaps be adopted by all trie implementations, for
    75	// convenience sake.
    76	// This unrolled code also boosts performance of the secure/bidirule package by
    77	// about 30%.
    78	// So, to remove this code:
    79	//   - add option to trie generator to define return type.
    80	//   - always return 1 byte size for ill-formed UTF-8 runes.
    81	
    82	// Lookup returns properties for the first rune in s and the width in bytes of
    83	// its encoding. The size will be 0 if s does not hold enough bytes to complete
    84	// the encoding.
    85	func Lookup(s []byte) (p Properties, sz int) {
    86		c0 := s[0]
    87		switch {
    88		case c0 < 0x80: // is ASCII
    89			return Properties{entry: bidiValues[c0]}, 1
    90		case c0 < 0xC2:
    91			return Properties{}, 1
    92		case c0 < 0xE0: // 2-byte UTF-8
    93			if len(s) < 2 {
    94				return Properties{}, 0
    95			}
    96			i := bidiIndex[c0]
    97			c1 := s[1]
    98			if c1 < 0x80 || 0xC0 <= c1 {
    99				return Properties{}, 1
   100			}
   101			return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   102		case c0 < 0xF0: // 3-byte UTF-8
   103			if len(s) < 3 {
   104				return Properties{}, 0
   105			}
   106			i := bidiIndex[c0]
   107			c1 := s[1]
   108			if c1 < 0x80 || 0xC0 <= c1 {
   109				return Properties{}, 1
   110			}
   111			o := uint32(i)<<6 + uint32(c1)
   112			i = bidiIndex[o]
   113			c2 := s[2]
   114			if c2 < 0x80 || 0xC0 <= c2 {
   115				return Properties{}, 1
   116			}
   117			return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   118		case c0 < 0xF8: // 4-byte UTF-8
   119			if len(s) < 4 {
   120				return Properties{}, 0
   121			}
   122			i := bidiIndex[c0]
   123			c1 := s[1]
   124			if c1 < 0x80 || 0xC0 <= c1 {
   125				return Properties{}, 1
   126			}
   127			o := uint32(i)<<6 + uint32(c1)
   128			i = bidiIndex[o]
   129			c2 := s[2]
   130			if c2 < 0x80 || 0xC0 <= c2 {
   131				return Properties{}, 1
   132			}
   133			o = uint32(i)<<6 + uint32(c2)
   134			i = bidiIndex[o]
   135			c3 := s[3]
   136			if c3 < 0x80 || 0xC0 <= c3 {
   137				return Properties{}, 1
   138			}
   139			return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   140		}
   141		// Illegal rune
   142		return Properties{}, 1
   143	}
   144	
   145	// LookupString returns properties for the first rune in s and the width in
   146	// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
   147	// complete the encoding.
   148	func LookupString(s string) (p Properties, sz int) {
   149		c0 := s[0]
   150		switch {
   151		case c0 < 0x80: // is ASCII
   152			return Properties{entry: bidiValues[c0]}, 1
   153		case c0 < 0xC2:
   154			return Properties{}, 1
   155		case c0 < 0xE0: // 2-byte UTF-8
   156			if len(s) < 2 {
   157				return Properties{}, 0
   158			}
   159			i := bidiIndex[c0]
   160			c1 := s[1]
   161			if c1 < 0x80 || 0xC0 <= c1 {
   162				return Properties{}, 1
   163			}
   164			return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   165		case c0 < 0xF0: // 3-byte UTF-8
   166			if len(s) < 3 {
   167				return Properties{}, 0
   168			}
   169			i := bidiIndex[c0]
   170			c1 := s[1]
   171			if c1 < 0x80 || 0xC0 <= c1 {
   172				return Properties{}, 1
   173			}
   174			o := uint32(i)<<6 + uint32(c1)
   175			i = bidiIndex[o]
   176			c2 := s[2]
   177			if c2 < 0x80 || 0xC0 <= c2 {
   178				return Properties{}, 1
   179			}
   180			return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   181		case c0 < 0xF8: // 4-byte UTF-8
   182			if len(s) < 4 {
   183				return Properties{}, 0
   184			}
   185			i := bidiIndex[c0]
   186			c1 := s[1]
   187			if c1 < 0x80 || 0xC0 <= c1 {
   188				return Properties{}, 1
   189			}
   190			o := uint32(i)<<6 + uint32(c1)
   191			i = bidiIndex[o]
   192			c2 := s[2]
   193			if c2 < 0x80 || 0xC0 <= c2 {
   194				return Properties{}, 1
   195			}
   196			o = uint32(i)<<6 + uint32(c2)
   197			i = bidiIndex[o]
   198			c3 := s[3]
   199			if c3 < 0x80 || 0xC0 <= c3 {
   200				return Properties{}, 1
   201			}
   202			return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   203		}
   204		// Illegal rune
   205		return Properties{}, 1
   206	}
   207	

View as plain text