...

Source file src/pkg/vendor/golang.org/x/text/unicode/bidi/bracket.go

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package bidi
     6	
     7	import (
     8		"container/list"
     9		"fmt"
    10		"sort"
    11	)
    12	
    13	// This file contains a port of the reference implementation of the
    14	// Bidi Parentheses Algorithm:
    15	// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
    16	//
    17	// The implementation in this file covers definitions BD14-BD16 and rule N0
    18	// of UAX#9.
    19	//
    20	// Some preprocessing is done for each rune before data is passed to this
    21	// algorithm:
    22	//  - opening and closing brackets are identified
    23	//  - a bracket pair type, like '(' and ')' is assigned a unique identifier that
    24	//    is identical for the opening and closing bracket. It is left to do these
    25	//    mappings.
    26	//  - The BPA algorithm requires that bracket characters that are canonical
    27	//    equivalents of each other be able to be substituted for each other.
    28	//    It is the responsibility of the caller to do this canonicalization.
    29	//
    30	// In implementing BD16, this implementation departs slightly from the "logical"
    31	// algorithm defined in UAX#9. In particular, the stack referenced there
    32	// supports operations that go beyond a "basic" stack. An equivalent
    33	// implementation based on a linked list is used here.
    34	
    35	// Bidi_Paired_Bracket_Type
    36	// BD14. An opening paired bracket is a character whose
    37	// Bidi_Paired_Bracket_Type property value is Open.
    38	//
    39	// BD15. A closing paired bracket is a character whose
    40	// Bidi_Paired_Bracket_Type property value is Close.
    41	type bracketType byte
    42	
    43	const (
    44		bpNone bracketType = iota
    45		bpOpen
    46		bpClose
    47	)
    48	
    49	// bracketPair holds a pair of index values for opening and closing bracket
    50	// location of a bracket pair.
    51	type bracketPair struct {
    52		opener int
    53		closer int
    54	}
    55	
    56	func (b *bracketPair) String() string {
    57		return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
    58	}
    59	
    60	// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
    61	type bracketPairs []bracketPair
    62	
    63	func (b bracketPairs) Len() int           { return len(b) }
    64	func (b bracketPairs) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
    65	func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
    66	
    67	// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
    68	//
    69	// For each rune, it takes the indexes into the original string, the class the
    70	// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
    71	// takes the direction type for the start-of-sentence and the embedding level.
    72	//
    73	// The identifiers for bracket types are the rune of the canonicalized opening
    74	// bracket for brackets (open or close) or 0 for runes that are not brackets.
    75	func resolvePairedBrackets(s *isolatingRunSequence) {
    76		p := bracketPairer{
    77			sos:              s.sos,
    78			openers:          list.New(),
    79			codesIsolatedRun: s.types,
    80			indexes:          s.indexes,
    81		}
    82		dirEmbed := L
    83		if s.level&1 != 0 {
    84			dirEmbed = R
    85		}
    86		p.locateBrackets(s.p.pairTypes, s.p.pairValues)
    87		p.resolveBrackets(dirEmbed, s.p.initialTypes)
    88	}
    89	
    90	type bracketPairer struct {
    91		sos Class // direction corresponding to start of sequence
    92	
    93		// The following is a restatement of BD 16 using non-algorithmic language.
    94		//
    95		// A bracket pair is a pair of characters consisting of an opening
    96		// paired bracket and a closing paired bracket such that the
    97		// Bidi_Paired_Bracket property value of the former equals the latter,
    98		// subject to the following constraints.
    99		// - both characters of a pair occur in the same isolating run sequence
   100		// - the closing character of a pair follows the opening character
   101		// - any bracket character can belong at most to one pair, the earliest possible one
   102		// - any bracket character not part of a pair is treated like an ordinary character
   103		// - pairs may nest properly, but their spans may not overlap otherwise
   104	
   105		// Bracket characters with canonical decompositions are supposed to be
   106		// treated as if they had been normalized, to allow normalized and non-
   107		// normalized text to give the same result. In this implementation that step
   108		// is pushed out to the caller. The caller has to ensure that the pairValue
   109		// slices contain the rune of the opening bracket after normalization for
   110		// any opening or closing bracket.
   111	
   112		openers *list.List // list of positions for opening brackets
   113	
   114		// bracket pair positions sorted by location of opening bracket
   115		pairPositions bracketPairs
   116	
   117		codesIsolatedRun []Class // directional bidi codes for an isolated run
   118		indexes          []int   // array of index values into the original string
   119	
   120	}
   121	
   122	// matchOpener reports whether characters at given positions form a matching
   123	// bracket pair.
   124	func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
   125		return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
   126	}
   127	
   128	const maxPairingDepth = 63
   129	
   130	// locateBrackets locates matching bracket pairs according to BD16.
   131	//
   132	// This implementation uses a linked list instead of a stack, because, while
   133	// elements are added at the front (like a push) they are not generally removed
   134	// in atomic 'pop' operations, reducing the benefit of the stack archetype.
   135	func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
   136		// traverse the run
   137		// do that explicitly (not in a for-each) so we can record position
   138		for i, index := range p.indexes {
   139	
   140			// look at the bracket type for each character
   141			if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
   142				// continue scanning
   143				continue
   144			}
   145			switch pairTypes[index] {
   146			case bpOpen:
   147				// check if maximum pairing depth reached
   148				if p.openers.Len() == maxPairingDepth {
   149					p.openers.Init()
   150					return
   151				}
   152				// remember opener location, most recent first
   153				p.openers.PushFront(i)
   154	
   155			case bpClose:
   156				// see if there is a match
   157				count := 0
   158				for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
   159					count++
   160					opener := elem.Value.(int)
   161					if p.matchOpener(pairValues, opener, i) {
   162						// if the opener matches, add nested pair to the ordered list
   163						p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
   164						// remove up to and including matched opener
   165						for ; count > 0; count-- {
   166							p.openers.Remove(p.openers.Front())
   167						}
   168						break
   169					}
   170				}
   171				sort.Sort(p.pairPositions)
   172				// if we get here, the closing bracket matched no openers
   173				// and gets ignored
   174			}
   175		}
   176	}
   177	
   178	// Bracket pairs within an isolating run sequence are processed as units so
   179	// that both the opening and the closing paired bracket in a pair resolve to
   180	// the same direction.
   181	//
   182	// N0. Process bracket pairs in an isolating run sequence sequentially in
   183	// the logical order of the text positions of the opening paired brackets
   184	// using the logic given below. Within this scope, bidirectional types EN
   185	// and AN are treated as R.
   186	//
   187	// Identify the bracket pairs in the current isolating run sequence
   188	// according to BD16. For each bracket-pair element in the list of pairs of
   189	// text positions:
   190	//
   191	// a Inspect the bidirectional types of the characters enclosed within the
   192	// bracket pair.
   193	//
   194	// b If any strong type (either L or R) matching the embedding direction is
   195	// found, set the type for both brackets in the pair to match the embedding
   196	// direction.
   197	//
   198	// o [ e ] o -> o e e e o
   199	//
   200	// o [ o e ] -> o e o e e
   201	//
   202	// o [ NI e ] -> o e NI e e
   203	//
   204	// c Otherwise, if a strong type (opposite the embedding direction) is
   205	// found, test for adjacent strong types as follows: 1 First, check
   206	// backwards before the opening paired bracket until the first strong type
   207	// (L, R, or sos) is found. If that first preceding strong type is opposite
   208	// the embedding direction, then set the type for both brackets in the pair
   209	// to that type. 2 Otherwise, set the type for both brackets in the pair to
   210	// the embedding direction.
   211	//
   212	// o [ o ] e -> o o o o e
   213	//
   214	// o [ o NI ] o -> o o o NI o o
   215	//
   216	// e [ o ] o -> e e o e o
   217	//
   218	// e [ o ] e -> e e o e e
   219	//
   220	// e ( o [ o ] NI ) e -> e e o o o o NI e e
   221	//
   222	// d Otherwise, do not set the type for the current bracket pair. Note that
   223	// if the enclosed text contains no strong types the paired brackets will
   224	// both resolve to the same level when resolved individually using rules N1
   225	// and N2.
   226	//
   227	// e ( NI ) o -> e ( NI ) o
   228	
   229	// getStrongTypeN0 maps character's directional code to strong type as required
   230	// by rule N0.
   231	//
   232	// TODO: have separate type for "strong" directionality.
   233	func (p *bracketPairer) getStrongTypeN0(index int) Class {
   234		switch p.codesIsolatedRun[index] {
   235		// in the scope of N0, number types are treated as R
   236		case EN, AN, AL, R:
   237			return R
   238		case L:
   239			return L
   240		default:
   241			return ON
   242		}
   243	}
   244	
   245	// classifyPairContent reports the strong types contained inside a Bracket Pair,
   246	// assuming the given embedding direction.
   247	//
   248	// It returns ON if no strong type is found. If a single strong type is found,
   249	// it returns this type. Otherwise it returns the embedding direction.
   250	//
   251	// TODO: use separate type for "strong" directionality.
   252	func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
   253		dirOpposite := ON
   254		for i := loc.opener + 1; i < loc.closer; i++ {
   255			dir := p.getStrongTypeN0(i)
   256			if dir == ON {
   257				continue
   258			}
   259			if dir == dirEmbed {
   260				return dir // type matching embedding direction found
   261			}
   262			dirOpposite = dir
   263		}
   264		// return ON if no strong type found, or class opposite to dirEmbed
   265		return dirOpposite
   266	}
   267	
   268	// classBeforePair determines which strong types are present before a Bracket
   269	// Pair. Return R or L if strong type found, otherwise ON.
   270	func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
   271		for i := loc.opener - 1; i >= 0; i-- {
   272			if dir := p.getStrongTypeN0(i); dir != ON {
   273				return dir
   274			}
   275		}
   276		// no strong types found, return sos
   277		return p.sos
   278	}
   279	
   280	// assignBracketType implements rule N0 for a single bracket pair.
   281	func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
   282		// rule "N0, a", inspect contents of pair
   283		dirPair := p.classifyPairContent(loc, dirEmbed)
   284	
   285		// dirPair is now L, R, or N (no strong type found)
   286	
   287		// the following logical tests are performed out of order compared to
   288		// the statement of the rules but yield the same results
   289		if dirPair == ON {
   290			return // case "d" - nothing to do
   291		}
   292	
   293		if dirPair != dirEmbed {
   294			// case "c": strong type found, opposite - check before (c.1)
   295			dirPair = p.classBeforePair(loc)
   296			if dirPair == dirEmbed || dirPair == ON {
   297				// no strong opposite type found before - use embedding (c.2)
   298				dirPair = dirEmbed
   299			}
   300		}
   301		// else: case "b", strong type found matching embedding,
   302		// no explicit action needed, as dirPair is already set to embedding
   303		// direction
   304	
   305		// set the bracket types to the type found
   306		p.setBracketsToType(loc, dirPair, initialTypes)
   307	}
   308	
   309	func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
   310		p.codesIsolatedRun[loc.opener] = dirPair
   311		p.codesIsolatedRun[loc.closer] = dirPair
   312	
   313		for i := loc.opener + 1; i < loc.closer; i++ {
   314			index := p.indexes[i]
   315			if initialTypes[index] != NSM {
   316				break
   317			}
   318			p.codesIsolatedRun[i] = dirPair
   319		}
   320	
   321		for i := loc.closer + 1; i < len(p.indexes); i++ {
   322			index := p.indexes[i]
   323			if initialTypes[index] != NSM {
   324				break
   325			}
   326			p.codesIsolatedRun[i] = dirPair
   327		}
   328	}
   329	
   330	// resolveBrackets implements rule N0 for a list of pairs.
   331	func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
   332		for _, loc := range p.pairPositions {
   333			p.assignBracketType(loc, dirEmbed, initialTypes)
   334		}
   335	}
   336	

View as plain text