Source file src/pkg/vendor/golang.org/x/text/unicode/bidi/core.go

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package bidi
     6	
     7	import "log"
     8	
     9	// This implementation is a port based on the reference implementation found at:
    10	// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
    11	//
    12	// described in Unicode Bidirectional Algorithm (UAX #9).
    13	//
    14	// Input:
    15	// There are two levels of input to the algorithm, since clients may prefer to
    16	// supply some information from out-of-band sources rather than relying on the
    17	// default behavior.
    18	//
    19	// - Bidi class array
    20	// - Bidi class array, with externally supplied base line direction
    21	//
    22	// Output:
    23	// Output is separated into several stages:
    24	//
    25	//  - levels array over entire paragraph
    26	//  - reordering array over entire paragraph
    27	//  - levels array over line
    28	//  - reordering array over line
    29	//
    30	// Note that for conformance to the Unicode Bidirectional Algorithm,
    31	// implementations are only required to generate correct reordering and
    32	// character directionality (odd or even levels) over a line. Generating
    33	// identical level arrays over a line is not required. Bidi explicit format
    34	// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
    35	// positions as long as the rest of the input is properly reordered.
    36	//
    37	// As the algorithm is defined to operate on a single paragraph at a time, this
    38	// implementation is written to handle single paragraphs. Thus rule P1 is
    39	// presumed by this implementation-- the data provided to the implementation is
    40	// assumed to be a single paragraph, and either contains no 'B' codes, or a
    41	// single 'B' code at the end of the input. 'B' is allowed as input to
    42	// illustrate how the algorithm assigns it a level.
    43	//
    44	// Also note that rules L3 and L4 depend on the rendering engine that uses the
    45	// result of the bidi algorithm. This implementation assumes that the rendering
    46	// engine expects combining marks in visual order (e.g. to the left of their
    47	// base character in RTL runs) and that it adjusts the glyphs used to render
    48	// mirrored characters that are in RTL runs so that they render appropriately.
    49	
    50	// level is the embedding level of a character. Even embedding levels indicate
    51	// left-to-right order and odd levels indicate right-to-left order. The special
    52	// level of -1 is reserved for undefined order.
    53	type level int8
    54	
    55	const implicitLevel level = -1
    56	
    57	// in returns if x is equal to any of the values in set.
    58	func (c Class) in(set ...Class) bool {
    59		for _, s := range set {
    60			if c == s {
    61				return true
    62			}
    63		}
    64		return false
    65	}
    66	
    67	// A paragraph contains the state of a paragraph.
    68	type paragraph struct {
    69		initialTypes []Class
    70	
    71		// Arrays of properties needed for paired bracket evaluation in N0
    72		pairTypes  []bracketType // paired Bracket types for paragraph
    73		pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
    74	
    75		embeddingLevel level // default: = implicitLevel;
    76	
    77		// at the paragraph levels
    78		resultTypes  []Class
    79		resultLevels []level
    80	
    81		// Index of matching PDI for isolate initiator characters. For other
    82		// characters, the value of matchingPDI will be set to -1. For isolate
    83		// initiators with no matching PDI, matchingPDI will be set to the length of
    84		// the input string.
    85		matchingPDI []int
    86	
    87		// Index of matching isolate initiator for PDI characters. For other
    88		// characters, and for PDIs with no matching isolate initiator, the value of
    89		// matchingIsolateInitiator will be set to -1.
    90		matchingIsolateInitiator []int
    91	}
    92	
    93	// newParagraph initializes a paragraph. The user needs to supply a few arrays
    94	// corresponding to the preprocessed text input. The types correspond to the
    95	// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
    96	// each rune. pairValues provides a unique bracket class identifier for each
    97	// rune (suggested is the rune of the open bracket for opening and matching
    98	// close brackets, after normalization). The embedding levels are optional, but
    99	// may be supplied to encode embedding levels of styled text.
   100	//
   101	// TODO: return an error.
   102	func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
   103		validateTypes(types)
   104		validatePbTypes(pairTypes)
   105		validatePbValues(pairValues, pairTypes)
   106		validateParagraphEmbeddingLevel(levels)
   107	
   108		p := &paragraph{
   109			initialTypes:   append([]Class(nil), types...),
   110			embeddingLevel: levels,
   111	
   112			pairTypes:  pairTypes,
   113			pairValues: pairValues,
   114	
   115			resultTypes: append([]Class(nil), types...),
   116		}
   117		p.run()
   118		return p
   119	}
   120	
   121	func (p *paragraph) Len() int { return len(p.initialTypes) }
   122	
   123	// The algorithm. Does not include line-based processing (Rules L1, L2).
   124	// These are applied later in the line-based phase of the algorithm.
   125	func (p *paragraph) run() {
   126		p.determineMatchingIsolates()
   127	
   128		// 1) determining the paragraph level
   129		// Rule P1 is the requirement for entering this algorithm.
   130		// Rules P2, P3.
   131		// If no externally supplied paragraph embedding level, use default.
   132		if p.embeddingLevel == implicitLevel {
   133			p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
   134		}
   135	
   136		// Initialize result levels to paragraph embedding level.
   137		p.resultLevels = make([]level, p.Len())
   138		setLevels(p.resultLevels, p.embeddingLevel)
   139	
   140		// 2) Explicit levels and directions
   141		// Rules X1-X8.
   142		p.determineExplicitEmbeddingLevels()
   143	
   144		// Rule X9.
   145		// We do not remove the embeddings, the overrides, the PDFs, and the BNs
   146		// from the string explicitly. But they are not copied into isolating run
   147		// sequences when they are created, so they are removed for all
   148		// practical purposes.
   149	
   150		// Rule X10.
   151		// Run remainder of algorithm one isolating run sequence at a time
   152		for _, seq := range p.determineIsolatingRunSequences() {
   153			// 3) resolving weak types
   154			// Rules W1-W7.
   155			seq.resolveWeakTypes()
   156	
   157			// 4a) resolving paired brackets
   158			// Rule N0
   159			resolvePairedBrackets(seq)
   160	
   161			// 4b) resolving neutral types
   162			// Rules N1-N3.
   163			seq.resolveNeutralTypes()
   164	
   165			// 5) resolving implicit embedding levels
   166			// Rules I1, I2.
   167			seq.resolveImplicitLevels()
   168	
   169			// Apply the computed levels and types
   170			seq.applyLevelsAndTypes()
   171		}
   172	
   173		// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
   174		// BNs. This is for convenience, so the resulting level array will have
   175		// a value for every character.
   176		p.assignLevelsToCharactersRemovedByX9()
   177	}
   178	
   179	// determineMatchingIsolates determines the matching PDI for each isolate
   180	// initiator and vice versa.
   181	//
   182	// Definition BD9.
   183	//
   184	// At the end of this function:
   185	//
   186	//  - The member variable matchingPDI is set to point to the index of the
   187	//    matching PDI character for each isolate initiator character. If there is
   188	//    no matching PDI, it is set to the length of the input text. For other
   189	//    characters, it is set to -1.
   190	//  - The member variable matchingIsolateInitiator is set to point to the
   191	//    index of the matching isolate initiator character for each PDI character.
   192	//    If there is no matching isolate initiator, or the character is not a PDI,
   193	//    it is set to -1.
   194	func (p *paragraph) determineMatchingIsolates() {
   195		p.matchingPDI = make([]int, p.Len())
   196		p.matchingIsolateInitiator = make([]int, p.Len())
   197	
   198		for i := range p.matchingIsolateInitiator {
   199			p.matchingIsolateInitiator[i] = -1
   200		}
   201	
   202		for i := range p.matchingPDI {
   203			p.matchingPDI[i] = -1
   204	
   205			if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
   206				depthCounter := 1
   207				for j := i + 1; j < p.Len(); j++ {
   208					if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
   209						depthCounter++
   210					} else if u == PDI {
   211						if depthCounter--; depthCounter == 0 {
   212							p.matchingPDI[i] = j
   213							p.matchingIsolateInitiator[j] = i
   214							break
   215						}
   216					}
   217				}
   218				if p.matchingPDI[i] == -1 {
   219					p.matchingPDI[i] = p.Len()
   220				}
   221			}
   222		}
   223	}
   224	
   225	// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
   226	// the substring limited by the given range [start, end).
   227	//
   228	// Determines the paragraph level based on rules P2, P3. This is also used
   229	// in rule X5c to find if an FSI should resolve to LRI or RLI.
   230	func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
   231		var strongType Class = unknownClass
   232	
   233		// Rule P2.
   234		for i := start; i < end; i++ {
   235			if t := p.resultTypes[i]; t.in(L, AL, R) {
   236				strongType = t
   237				break
   238			} else if t.in(FSI, LRI, RLI) {
   239				i = p.matchingPDI[i] // skip over to the matching PDI
   240				if i > end {
   241					log.Panic("assert (i <= end)")
   242				}
   243			}
   244		}
   245		// Rule P3.
   246		switch strongType {
   247		case unknownClass: // none found
   248			// default embedding level when no strong types found is 0.
   249			return 0
   250		case L:
   251			return 0
   252		default: // AL, R
   253			return 1
   254		}
   255	}
   256	
   257	const maxDepth = 125
   258	
   259	// This stack will store the embedding levels and override and isolated
   260	// statuses
   261	type directionalStatusStack struct {
   262		stackCounter        int
   263		embeddingLevelStack [maxDepth + 1]level
   264		overrideStatusStack [maxDepth + 1]Class
   265		isolateStatusStack  [maxDepth + 1]bool
   266	}
   267	
   268	func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
   269	func (s *directionalStatusStack) pop()       { s.stackCounter-- }
   270	func (s *directionalStatusStack) depth() int { return s.stackCounter }
   271	
   272	func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
   273		s.embeddingLevelStack[s.stackCounter] = level
   274		s.overrideStatusStack[s.stackCounter] = overrideStatus
   275		s.isolateStatusStack[s.stackCounter] = isolateStatus
   276		s.stackCounter++
   277	}
   278	
   279	func (s *directionalStatusStack) lastEmbeddingLevel() level {
   280		return s.embeddingLevelStack[s.stackCounter-1]
   281	}
   282	
   283	func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
   284		return s.overrideStatusStack[s.stackCounter-1]
   285	}
   286	
   287	func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
   288		return s.isolateStatusStack[s.stackCounter-1]
   289	}
   290	
   291	// Determine explicit levels using rules X1 - X8
   292	func (p *paragraph) determineExplicitEmbeddingLevels() {
   293		var stack directionalStatusStack
   294		var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
   295	
   296		// Rule X1.
   297		stack.push(p.embeddingLevel, ON, false)
   298	
   299		for i, t := range p.resultTypes {
   300			// Rules X2, X3, X4, X5, X5a, X5b, X5c
   301			switch t {
   302			case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
   303				isIsolate := t.in(RLI, LRI, FSI)
   304				isRTL := t.in(RLE, RLO, RLI)
   305	
   306				// override if this is an FSI that resolves to RLI
   307				if t == FSI {
   308					isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
   309				}
   310				if isIsolate {
   311					p.resultLevels[i] = stack.lastEmbeddingLevel()
   312					if stack.lastDirectionalOverrideStatus() != ON {
   313						p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   314					}
   315				}
   316	
   317				var newLevel level
   318				if isRTL {
   319					// least greater odd
   320					newLevel = (stack.lastEmbeddingLevel() + 1) | 1
   321				} else {
   322					// least greater even
   323					newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
   324				}
   325	
   326				if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
   327					if isIsolate {
   328						validIsolateCount++
   329					}
   330					// Push new embedding level, override status, and isolated
   331					// status.
   332					// No check for valid stack counter, since the level check
   333					// suffices.
   334					switch t {
   335					case LRO:
   336						stack.push(newLevel, L, isIsolate)
   337					case RLO:
   338						stack.push(newLevel, R, isIsolate)
   339					default:
   340						stack.push(newLevel, ON, isIsolate)
   341					}
   342					// Not really part of the spec
   343					if !isIsolate {
   344						p.resultLevels[i] = newLevel
   345					}
   346				} else {
   347					// This is an invalid explicit formatting character,
   348					// so apply the "Otherwise" part of rules X2-X5b.
   349					if isIsolate {
   350						overflowIsolateCount++
   351					} else { // !isIsolate
   352						if overflowIsolateCount == 0 {
   353							overflowEmbeddingCount++
   354						}
   355					}
   356				}
   357	
   358			// Rule X6a
   359			case PDI:
   360				if overflowIsolateCount > 0 {
   361					overflowIsolateCount--
   362				} else if validIsolateCount == 0 {
   363					// do nothing
   364				} else {
   365					overflowEmbeddingCount = 0
   366					for !stack.lastDirectionalIsolateStatus() {
   367						stack.pop()
   368					}
   369					stack.pop()
   370					validIsolateCount--
   371				}
   372				p.resultLevels[i] = stack.lastEmbeddingLevel()
   373	
   374			// Rule X7
   375			case PDF:
   376				// Not really part of the spec
   377				p.resultLevels[i] = stack.lastEmbeddingLevel()
   378	
   379				if overflowIsolateCount > 0 {
   380					// do nothing
   381				} else if overflowEmbeddingCount > 0 {
   382					overflowEmbeddingCount--
   383				} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
   384					stack.pop()
   385				}
   386	
   387			case B: // paragraph separator.
   388				// Rule X8.
   389	
   390				// These values are reset for clarity, in this implementation B
   391				// can only occur as the last code in the array.
   392				stack.empty()
   393				overflowIsolateCount = 0
   394				overflowEmbeddingCount = 0
   395				validIsolateCount = 0
   396				p.resultLevels[i] = p.embeddingLevel
   397	
   398			default:
   399				p.resultLevels[i] = stack.lastEmbeddingLevel()
   400				if stack.lastDirectionalOverrideStatus() != ON {
   401					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   402				}
   403			}
   404		}
   405	}
   406	
   407	type isolatingRunSequence struct {
   408		p *paragraph
   409	
   410		indexes []int // indexes to the original string
   411	
   412		types          []Class // type of each character using the index
   413		resolvedLevels []level // resolved levels after application of rules
   414		level          level
   415		sos, eos       Class
   416	}
   417	
   418	func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
   419	
   420	func maxLevel(a, b level) level {
   421		if a > b {
   422			return a
   423		}
   424		return b
   425	}
   426	
   427	// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
   428	// 			 either L or R, for each isolating run sequence.
   429	func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
   430		length := len(indexes)
   431		types := make([]Class, length)
   432		for i, x := range indexes {
   433			types[i] = p.resultTypes[x]
   434		}
   435	
   436		// assign level, sos and eos
   437		prevChar := indexes[0] - 1
   438		for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
   439			prevChar--
   440		}
   441		prevLevel := p.embeddingLevel
   442		if prevChar >= 0 {
   443			prevLevel = p.resultLevels[prevChar]
   444		}
   445	
   446		var succLevel level
   447		lastType := types[length-1]
   448		if lastType.in(LRI, RLI, FSI) {
   449			succLevel = p.embeddingLevel
   450		} else {
   451			// the first character after the end of run sequence
   452			limit := indexes[length-1] + 1
   453			for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
   454	
   455			}
   456			succLevel = p.embeddingLevel
   457			if limit < p.Len() {
   458				succLevel = p.resultLevels[limit]
   459			}
   460		}
   461		level := p.resultLevels[indexes[0]]
   462		return &isolatingRunSequence{
   463			p:       p,
   464			indexes: indexes,
   465			types:   types,
   466			level:   level,
   467			sos:     typeForLevel(maxLevel(prevLevel, level)),
   468			eos:     typeForLevel(maxLevel(succLevel, level)),
   469		}
   470	}
   471	
   472	// Resolving weak types Rules W1-W7.
   473	//
   474	// Note that some weak types (EN, AN) remain after this processing is
   475	// complete.
   476	func (s *isolatingRunSequence) resolveWeakTypes() {
   477	
   478		// on entry, only these types remain
   479		s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
   480	
   481		// Rule W1.
   482		// Changes all NSMs.
   483		preceedingCharacterType := s.sos
   484		for i, t := range s.types {
   485			if t == NSM {
   486				s.types[i] = preceedingCharacterType
   487			} else {
   488				if t.in(LRI, RLI, FSI, PDI) {
   489					preceedingCharacterType = ON
   490				}
   491				preceedingCharacterType = t
   492			}
   493		}
   494	
   495		// Rule W2.
   496		// EN does not change at the start of the run, because sos != AL.
   497		for i, t := range s.types {
   498			if t == EN {
   499				for j := i - 1; j >= 0; j-- {
   500					if t := s.types[j]; t.in(L, R, AL) {
   501						if t == AL {
   502							s.types[i] = AN
   503						}
   504						break
   505					}
   506				}
   507			}
   508		}
   509	
   510		// Rule W3.
   511		for i, t := range s.types {
   512			if t == AL {
   513				s.types[i] = R
   514			}
   515		}
   516	
   517		// Rule W4.
   518		// Since there must be values on both sides for this rule to have an
   519		// effect, the scan skips the first and last value.
   520		//
   521		// Although the scan proceeds left to right, and changes the type
   522		// values in a way that would appear to affect the computations
   523		// later in the scan, there is actually no problem. A change in the
   524		// current value can only affect the value to its immediate right,
   525		// and only affect it if it is ES or CS. But the current value can
   526		// only change if the value to its right is not ES or CS. Thus
   527		// either the current value will not change, or its change will have
   528		// no effect on the remainder of the analysis.
   529	
   530		for i := 1; i < s.Len()-1; i++ {
   531			t := s.types[i]
   532			if t == ES || t == CS {
   533				prevSepType := s.types[i-1]
   534				succSepType := s.types[i+1]
   535				if prevSepType == EN && succSepType == EN {
   536					s.types[i] = EN
   537				} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
   538					s.types[i] = AN
   539				}
   540			}
   541		}
   542	
   543		// Rule W5.
   544		for i, t := range s.types {
   545			if t == ET {
   546				// locate end of sequence
   547				runStart := i
   548				runEnd := s.findRunLimit(runStart, ET)
   549	
   550				// check values at ends of sequence
   551				t := s.sos
   552				if runStart > 0 {
   553					t = s.types[runStart-1]
   554				}
   555				if t != EN {
   556					t = s.eos
   557					if runEnd < len(s.types) {
   558						t = s.types[runEnd]
   559					}
   560				}
   561				if t == EN {
   562					setTypes(s.types[runStart:runEnd], EN)
   563				}
   564				// continue at end of sequence
   565				i = runEnd
   566			}
   567		}
   568	
   569		// Rule W6.
   570		for i, t := range s.types {
   571			if t.in(ES, ET, CS) {
   572				s.types[i] = ON
   573			}
   574		}
   575	
   576		// Rule W7.
   577		for i, t := range s.types {
   578			if t == EN {
   579				// set default if we reach start of run
   580				prevStrongType := s.sos
   581				for j := i - 1; j >= 0; j-- {
   582					t = s.types[j]
   583					if t == L || t == R { // AL's have been changed to R
   584						prevStrongType = t
   585						break
   586					}
   587				}
   588				if prevStrongType == L {
   589					s.types[i] = L
   590				}
   591			}
   592		}
   593	}
   594	
   595	// 6) resolving neutral types Rules N1-N2.
   596	func (s *isolatingRunSequence) resolveNeutralTypes() {
   597	
   598		// on entry, only these types can be in resultTypes
   599		s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
   600	
   601		for i, t := range s.types {
   602			switch t {
   603			case WS, ON, B, S, RLI, LRI, FSI, PDI:
   604				// find bounds of run of neutrals
   605				runStart := i
   606				runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
   607	
   608				// determine effective types at ends of run
   609				var leadType, trailType Class
   610	
   611				// Note that the character found can only be L, R, AN, or
   612				// EN.
   613				if runStart == 0 {
   614					leadType = s.sos
   615				} else {
   616					leadType = s.types[runStart-1]
   617					if leadType.in(AN, EN) {
   618						leadType = R
   619					}
   620				}
   621				if runEnd == len(s.types) {
   622					trailType = s.eos
   623				} else {
   624					trailType = s.types[runEnd]
   625					if trailType.in(AN, EN) {
   626						trailType = R
   627					}
   628				}
   629	
   630				var resolvedType Class
   631				if leadType == trailType {
   632					// Rule N1.
   633					resolvedType = leadType
   634				} else {
   635					// Rule N2.
   636					// Notice the embedding level of the run is used, not
   637					// the paragraph embedding level.
   638					resolvedType = typeForLevel(s.level)
   639				}
   640	
   641				setTypes(s.types[runStart:runEnd], resolvedType)
   642	
   643				// skip over run of (former) neutrals
   644				i = runEnd
   645			}
   646		}
   647	}
   648	
   649	func setLevels(levels []level, newLevel level) {
   650		for i := range levels {
   651			levels[i] = newLevel
   652		}
   653	}
   654	
   655	func setTypes(types []Class, newType Class) {
   656		for i := range types {
   657			types[i] = newType
   658		}
   659	}
   660	
   661	// 7) resolving implicit embedding levels Rules I1, I2.
   662	func (s *isolatingRunSequence) resolveImplicitLevels() {
   663	
   664		// on entry, only these types can be in resultTypes
   665		s.assertOnly(L, R, EN, AN)
   666	
   667		s.resolvedLevels = make([]level, len(s.types))
   668		setLevels(s.resolvedLevels, s.level)
   669	
   670		if (s.level & 1) == 0 { // even level
   671			for i, t := range s.types {
   672				// Rule I1.
   673				if t == L {
   674					// no change
   675				} else if t == R {
   676					s.resolvedLevels[i] += 1
   677				} else { // t == AN || t == EN
   678					s.resolvedLevels[i] += 2
   679				}
   680			}
   681		} else { // odd level
   682			for i, t := range s.types {
   683				// Rule I2.
   684				if t == R {
   685					// no change
   686				} else { // t == L || t == AN || t == EN
   687					s.resolvedLevels[i] += 1
   688				}
   689			}
   690		}
   691	}
   692	
   693	// Applies the levels and types resolved in rules W1-I2 to the
   694	// resultLevels array.
   695	func (s *isolatingRunSequence) applyLevelsAndTypes() {
   696		for i, x := range s.indexes {
   697			s.p.resultTypes[x] = s.types[i]
   698			s.p.resultLevels[x] = s.resolvedLevels[i]
   699		}
   700	}
   701	
   702	// Return the limit of the run consisting only of the types in validSet
   703	// starting at index. This checks the value at index, and will return
   704	// index if that value is not in validSet.
   705	func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
   706	loop:
   707		for ; index < len(s.types); index++ {
   708			t := s.types[index]
   709			for _, valid := range validSet {
   710				if t == valid {
   711					continue loop
   712				}
   713			}
   714			return index // didn't find a match in validSet
   715		}
   716		return len(s.types)
   717	}
   718	
   719	// Algorithm validation. Assert that all values in types are in the
   720	// provided set.
   721	func (s *isolatingRunSequence) assertOnly(codes ...Class) {
   722	loop:
   723		for i, t := range s.types {
   724			for _, c := range codes {
   725				if t == c {
   726					continue loop
   727				}
   728			}
   729			log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
   730		}
   731	}
   732	
   733	// determineLevelRuns returns an array of level runs. Each level run is
   734	// described as an array of indexes into the input string.
   735	//
   736	// Determines the level runs. Rule X9 will be applied in determining the
   737	// runs, in the way that makes sure the characters that are supposed to be
   738	// removed are not included in the runs.
   739	func (p *paragraph) determineLevelRuns() [][]int {
   740		run := []int{}
   741		allRuns := [][]int{}
   742		currentLevel := implicitLevel
   743	
   744		for i := range p.initialTypes {
   745			if !isRemovedByX9(p.initialTypes[i]) {
   746				if p.resultLevels[i] != currentLevel {
   747					// we just encountered a new run; wrap up last run
   748					if currentLevel >= 0 { // only wrap it up if there was a run
   749						allRuns = append(allRuns, run)
   750						run = nil
   751					}
   752					// Start new run
   753					currentLevel = p.resultLevels[i]
   754				}
   755				run = append(run, i)
   756			}
   757		}
   758		// Wrap up the final run, if any
   759		if len(run) > 0 {
   760			allRuns = append(allRuns, run)
   761		}
   762		return allRuns
   763	}
   764	
   765	// Definition BD13. Determine isolating run sequences.
   766	func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
   767		levelRuns := p.determineLevelRuns()
   768	
   769		// Compute the run that each character belongs to
   770		runForCharacter := make([]int, p.Len())
   771		for i, run := range levelRuns {
   772			for _, index := range run {
   773				runForCharacter[index] = i
   774			}
   775		}
   776	
   777		sequences := []*isolatingRunSequence{}
   778	
   779		var currentRunSequence []int
   780	
   781		for _, run := range levelRuns {
   782			first := run[0]
   783			if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
   784				currentRunSequence = nil
   785				// int run = i;
   786				for {
   787					// Copy this level run into currentRunSequence
   788					currentRunSequence = append(currentRunSequence, run...)
   789	
   790					last := currentRunSequence[len(currentRunSequence)-1]
   791					lastT := p.initialTypes[last]
   792					if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
   793						run = levelRuns[runForCharacter[p.matchingPDI[last]]]
   794					} else {
   795						break
   796					}
   797				}
   798				sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
   799			}
   800		}
   801		return sequences
   802	}
   803	
   804	// Assign level information to characters removed by rule X9. This is for
   805	// ease of relating the level information to the original input data. Note
   806	// that the levels assigned to these codes are arbitrary, they're chosen so
   807	// as to avoid breaking level runs.
   808	func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
   809		for i, t := range p.initialTypes {
   810			if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
   811				p.resultTypes[i] = t
   812				p.resultLevels[i] = -1
   813			}
   814		}
   815		// now propagate forward the levels information (could have
   816		// propagated backward, the main thing is not to introduce a level
   817		// break where one doesn't already exist).
   818	
   819		if p.resultLevels[0] == -1 {
   820			p.resultLevels[0] = p.embeddingLevel
   821		}
   822		for i := 1; i < len(p.initialTypes); i++ {
   823			if p.resultLevels[i] == -1 {
   824				p.resultLevels[i] = p.resultLevels[i-1]
   825			}
   826		}
   827		// Embedding information is for informational purposes only so need not be
   828		// adjusted.
   829	}
   830	
   831	//
   832	// Output
   833	//
   834	
   835	// getLevels computes levels array breaking lines at offsets in linebreaks.
   836	// Rule L1.
   837	//
   838	// The linebreaks array must include at least one value. The values must be
   839	// in strictly increasing order (no duplicates) between 1 and the length of
   840	// the text, inclusive. The last value must be the length of the text.
   841	func (p *paragraph) getLevels(linebreaks []int) []level {
   842		// Note that since the previous processing has removed all
   843		// P, S, and WS values from resultTypes, the values referred to
   844		// in these rules are the initial types, before any processing
   845		// has been applied (including processing of overrides).
   846		//
   847		// This example implementation has reinserted explicit format codes
   848		// and BN, in order that the levels array correspond to the
   849		// initial text. Their final placement is not normative.
   850		// These codes are treated like WS in this implementation,
   851		// so they don't interrupt sequences of WS.
   852	
   853		validateLineBreaks(linebreaks, p.Len())
   854	
   855		result := append([]level(nil), p.resultLevels...)
   856	
   857		// don't worry about linebreaks since if there is a break within
   858		// a series of WS values preceding S, the linebreak itself
   859		// causes the reset.
   860		for i, t := range p.initialTypes {
   861			if t.in(B, S) {
   862				// Rule L1, clauses one and two.
   863				result[i] = p.embeddingLevel
   864	
   865				// Rule L1, clause three.
   866				for j := i - 1; j >= 0; j-- {
   867					if isWhitespace(p.initialTypes[j]) { // including format codes
   868						result[j] = p.embeddingLevel
   869					} else {
   870						break
   871					}
   872				}
   873			}
   874		}
   875	
   876		// Rule L1, clause four.
   877		start := 0
   878		for _, limit := range linebreaks {
   879			for j := limit - 1; j >= start; j-- {
   880				if isWhitespace(p.initialTypes[j]) { // including format codes
   881					result[j] = p.embeddingLevel
   882				} else {
   883					break
   884				}
   885			}
   886			start = limit
   887		}
   888	
   889		return result
   890	}
   891	
   892	// getReordering returns the reordering of lines from a visual index to a
   893	// logical index for line breaks at the given offsets.
   894	//
   895	// Lines are concatenated from left to right. So for example, the fifth
   896	// character from the left on the third line is
   897	//
   898	// 		getReordering(linebreaks)[linebreaks[1] + 4]
   899	//
   900	// (linebreaks[1] is the position after the last character of the second
   901	// line, which is also the index of the first character on the third line,
   902	// and adding four gets the fifth character from the left).
   903	//
   904	// The linebreaks array must include at least one value. The values must be
   905	// in strictly increasing order (no duplicates) between 1 and the length of
   906	// the text, inclusive. The last value must be the length of the text.
   907	func (p *paragraph) getReordering(linebreaks []int) []int {
   908		validateLineBreaks(linebreaks, p.Len())
   909	
   910		return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
   911	}
   912	
   913	// Return multiline reordering array for a given level array. Reordering
   914	// does not occur across a line break.
   915	func computeMultilineReordering(levels []level, linebreaks []int) []int {
   916		result := make([]int, len(levels))
   917	
   918		start := 0
   919		for _, limit := range linebreaks {
   920			tempLevels := make([]level, limit-start)
   921			copy(tempLevels, levels[start:])
   922	
   923			for j, order := range computeReordering(tempLevels) {
   924				result[start+j] = order + start
   925			}
   926			start = limit
   927		}
   928		return result
   929	}
   930	
   931	// Return reordering array for a given level array. This reorders a single
   932	// line. The reordering is a visual to logical map. For example, the
   933	// leftmost char is string.charAt(order[0]). Rule L2.
   934	func computeReordering(levels []level) []int {
   935		result := make([]int, len(levels))
   936		// initialize order
   937		for i := range result {
   938			result[i] = i
   939		}
   940	
   941		// locate highest level found on line.
   942		// Note the rules say text, but no reordering across line bounds is
   943		// performed, so this is sufficient.
   944		highestLevel := level(0)
   945		lowestOddLevel := level(maxDepth + 2)
   946		for _, level := range levels {
   947			if level > highestLevel {
   948				highestLevel = level
   949			}
   950			if level&1 != 0 && level < lowestOddLevel {
   951				lowestOddLevel = level
   952			}
   953		}
   954	
   955		for level := highestLevel; level >= lowestOddLevel; level-- {
   956			for i := 0; i < len(levels); i++ {
   957				if levels[i] >= level {
   958					// find range of text at or above this level
   959					start := i
   960					limit := i + 1
   961					for limit < len(levels) && levels[limit] >= level {
   962						limit++
   963					}
   964	
   965					for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
   966						result[j], result[k] = result[k], result[j]
   967					}
   968					// skip to end of level run
   969					i = limit
   970				}
   971			}
   972		}
   973	
   974		return result
   975	}
   976	
   977	// isWhitespace reports whether the type is considered a whitespace type for the
   978	// line break rules.
   979	func isWhitespace(c Class) bool {
   980		switch c {
   981		case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
   982			return true
   983		}
   984		return false
   985	}
   986	
   987	// isRemovedByX9 reports whether the type is one of the types removed in X9.
   988	func isRemovedByX9(c Class) bool {
   989		switch c {
   990		case LRE, RLE, LRO, RLO, PDF, BN:
   991			return true
   992		}
   993		return false
   994	}
   995	
   996	// typeForLevel reports the strong type (L or R) corresponding to the level.
   997	func typeForLevel(level level) Class {
   998		if (level & 0x1) == 0 {
   999			return L
  1000		}
  1001		return R
  1002	}
  1003	
  1004	// TODO: change validation to not panic
  1005	
  1006	func validateTypes(types []Class) {
  1007		if len(types) == 0 {
  1008			log.Panic("types is null")
  1009		}
  1010		for i, t := range types[:len(types)-1] {
  1011			if t == B {
  1012				log.Panicf("B type before end of paragraph at index: %d", i)
  1013			}
  1014		}
  1015	}
  1016	
  1017	func validateParagraphEmbeddingLevel(embeddingLevel level) {
  1018		if embeddingLevel != implicitLevel &&
  1019			embeddingLevel != 0 &&
  1020			embeddingLevel != 1 {
  1021			log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
  1022		}
  1023	}
  1024	
  1025	func validateLineBreaks(linebreaks []int, textLength int) {
  1026		prev := 0
  1027		for i, next := range linebreaks {
  1028			if next <= prev {
  1029				log.Panicf("bad linebreak: %d at index: %d", next, i)
  1030			}
  1031			prev = next
  1032		}
  1033		if prev != textLength {
  1034			log.Panicf("last linebreak was %d, want %d", prev, textLength)
  1035		}
  1036	}
  1037	
  1038	func validatePbTypes(pairTypes []bracketType) {
  1039		if len(pairTypes) == 0 {
  1040			log.Panic("pairTypes is null")
  1041		}
  1042		for i, pt := range pairTypes {
  1043			switch pt {
  1044			case bpNone, bpOpen, bpClose:
  1045			default:
  1046				log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
  1047			}
  1048		}
  1049	}
  1050	
  1051	func validatePbValues(pairValues []rune, pairTypes []bracketType) {
  1052		if pairValues == nil {
  1053			log.Panic("pairValues is null")
  1054		}
  1055		if len(pairTypes) != len(pairValues) {
  1056			log.Panic("pairTypes is different length from pairValues")
  1057		}
  1058	}
  1059
View as plain text