Source file src/text/template/parse/lex.go
1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComplex
45 itemAssign
46 itemDeclare
47 itemEOF
48 itemField
49 itemIdentifier
50 itemLeftDelim
51 itemLeftParen
52 itemNumber
53 itemPipe
54 itemRawString
55 itemRightDelim
56 itemRightParen
57 itemSpace
58 itemString
59 itemText
60 itemVariable
61
62 itemKeyword
63 itemBlock
64 itemDot
65 itemDefine
66 itemElse
67 itemEnd
68 itemIf
69 itemNil
70 itemRange
71 itemTemplate
72 itemWith
73 )
74
75 var key = map[string]itemType{
76 ".": itemDot,
77 "block": itemBlock,
78 "define": itemDefine,
79 "else": itemElse,
80 "end": itemEnd,
81 "if": itemIf,
82 "range": itemRange,
83 "nil": itemNil,
84 "template": itemTemplate,
85 "with": itemWith,
86 }
87
88 const eof = -1
89
90
91
92
93
94
95
96
97
98 const (
99 spaceChars = " \t\r\n"
100 leftTrimMarker = "- "
101 rightTrimMarker = " -"
102 trimMarkerLen = Pos(len(leftTrimMarker))
103 )
104
105
106 type stateFn func(*lexer) stateFn
107
108
109 type lexer struct {
110 name string
111 input string
112 leftDelim string
113 rightDelim string
114 trimRightDelim string
115 pos Pos
116 start Pos
117 width Pos
118 items chan item
119 parenDepth int
120 line int
121 startLine int
122 }
123
124
125 func (l *lexer) next() rune {
126 if int(l.pos) >= len(l.input) {
127 l.width = 0
128 return eof
129 }
130 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
131 l.width = Pos(w)
132 l.pos += l.width
133 if r == '\n' {
134 l.line++
135 }
136 return r
137 }
138
139
140 func (l *lexer) peek() rune {
141 r := l.next()
142 l.backup()
143 return r
144 }
145
146
147 func (l *lexer) backup() {
148 l.pos -= l.width
149
150 if l.width == 1 && l.input[l.pos] == '\n' {
151 l.line--
152 }
153 }
154
155
156 func (l *lexer) emit(t itemType) {
157 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
158 l.start = l.pos
159 l.startLine = l.line
160 }
161
162
163 func (l *lexer) ignore() {
164 l.line += strings.Count(l.input[l.start:l.pos], "\n")
165 l.start = l.pos
166 l.startLine = l.line
167 }
168
169
170 func (l *lexer) accept(valid string) bool {
171 if strings.ContainsRune(valid, l.next()) {
172 return true
173 }
174 l.backup()
175 return false
176 }
177
178
179 func (l *lexer) acceptRun(valid string) {
180 for strings.ContainsRune(valid, l.next()) {
181 }
182 l.backup()
183 }
184
185
186
187 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
189 return nil
190 }
191
192
193
194 func (l *lexer) nextItem() item {
195 return <-l.items
196 }
197
198
199
200 func (l *lexer) drain() {
201 for range l.items {
202 }
203 }
204
205
206 func lex(name, input, left, right string) *lexer {
207 if left == "" {
208 left = leftDelim
209 }
210 if right == "" {
211 right = rightDelim
212 }
213 l := &lexer{
214 name: name,
215 input: input,
216 leftDelim: left,
217 rightDelim: right,
218 trimRightDelim: rightTrimMarker + right,
219 items: make(chan item),
220 line: 1,
221 startLine: 1,
222 }
223 go l.run()
224 return l
225 }
226
227
228 func (l *lexer) run() {
229 for state := lexText; state != nil; {
230 state = state(l)
231 }
232 close(l.items)
233 }
234
235
236
237 const (
238 leftDelim = "{{"
239 rightDelim = "}}"
240 leftComment = "/*"
241 rightComment = "*/"
242 )
243
244
245 func lexText(l *lexer) stateFn {
246 l.width = 0
247 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
248 ldn := Pos(len(l.leftDelim))
249 l.pos += Pos(x)
250 trimLength := Pos(0)
251 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) {
252 trimLength = rightTrimLength(l.input[l.start:l.pos])
253 }
254 l.pos -= trimLength
255 if l.pos > l.start {
256 l.line += strings.Count(l.input[l.start:l.pos], "\n")
257 l.emit(itemText)
258 }
259 l.pos += trimLength
260 l.ignore()
261 return lexLeftDelim
262 }
263 l.pos = Pos(len(l.input))
264
265 if l.pos > l.start {
266 l.line += strings.Count(l.input[l.start:l.pos], "\n")
267 l.emit(itemText)
268 }
269 l.emit(itemEOF)
270 return nil
271 }
272
273
274 func rightTrimLength(s string) Pos {
275 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
276 }
277
278
279 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
280 if strings.HasPrefix(l.input[l.pos:], l.trimRightDelim) {
281 return true, true
282 }
283 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
284 return true, false
285 }
286 return false, false
287 }
288
289
290 func leftTrimLength(s string) Pos {
291 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
292 }
293
294
295 func lexLeftDelim(l *lexer) stateFn {
296 l.pos += Pos(len(l.leftDelim))
297 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker)
298 afterMarker := Pos(0)
299 if trimSpace {
300 afterMarker = trimMarkerLen
301 }
302 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
303 l.pos += afterMarker
304 l.ignore()
305 return lexComment
306 }
307 l.emit(itemLeftDelim)
308 l.pos += afterMarker
309 l.ignore()
310 l.parenDepth = 0
311 return lexInsideAction
312 }
313
314
315 func lexComment(l *lexer) stateFn {
316 l.pos += Pos(len(leftComment))
317 i := strings.Index(l.input[l.pos:], rightComment)
318 if i < 0 {
319 return l.errorf("unclosed comment")
320 }
321 l.pos += Pos(i + len(rightComment))
322 delim, trimSpace := l.atRightDelim()
323 if !delim {
324 return l.errorf("comment ends before closing delimiter")
325 }
326 if trimSpace {
327 l.pos += trimMarkerLen
328 }
329 l.pos += Pos(len(l.rightDelim))
330 if trimSpace {
331 l.pos += leftTrimLength(l.input[l.pos:])
332 }
333 l.ignore()
334 return lexText
335 }
336
337
338 func lexRightDelim(l *lexer) stateFn {
339 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker)
340 if trimSpace {
341 l.pos += trimMarkerLen
342 l.ignore()
343 }
344 l.pos += Pos(len(l.rightDelim))
345 l.emit(itemRightDelim)
346 if trimSpace {
347 l.pos += leftTrimLength(l.input[l.pos:])
348 l.ignore()
349 }
350 return lexText
351 }
352
353
354 func lexInsideAction(l *lexer) stateFn {
355
356
357
358 delim, _ := l.atRightDelim()
359 if delim {
360 if l.parenDepth == 0 {
361 return lexRightDelim
362 }
363 return l.errorf("unclosed left paren")
364 }
365 switch r := l.next(); {
366 case r == eof || isEndOfLine(r):
367 return l.errorf("unclosed action")
368 case isSpace(r):
369 l.backup()
370 return lexSpace
371 case r == '=':
372 l.emit(itemAssign)
373 case r == ':':
374 if l.next() != '=' {
375 return l.errorf("expected :=")
376 }
377 l.emit(itemDeclare)
378 case r == '|':
379 l.emit(itemPipe)
380 case r == '"':
381 return lexQuote
382 case r == '`':
383 return lexRawQuote
384 case r == '$':
385 return lexVariable
386 case r == '\'':
387 return lexChar
388 case r == '.':
389
390 if l.pos < Pos(len(l.input)) {
391 r := l.input[l.pos]
392 if r < '0' || '9' < r {
393 return lexField
394 }
395 }
396 fallthrough
397 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
398 l.backup()
399 return lexNumber
400 case isAlphaNumeric(r):
401 l.backup()
402 return lexIdentifier
403 case r == '(':
404 l.emit(itemLeftParen)
405 l.parenDepth++
406 case r == ')':
407 l.emit(itemRightParen)
408 l.parenDepth--
409 if l.parenDepth < 0 {
410 return l.errorf("unexpected right paren %#U", r)
411 }
412 case r <= unicode.MaxASCII && unicode.IsPrint(r):
413 l.emit(itemChar)
414 return lexInsideAction
415 default:
416 return l.errorf("unrecognized character in action: %#U", r)
417 }
418 return lexInsideAction
419 }
420
421
422
423
424 func lexSpace(l *lexer) stateFn {
425 var r rune
426 var numSpaces int
427 for {
428 r = l.peek()
429 if !isSpace(r) {
430 break
431 }
432 l.next()
433 numSpaces++
434 }
435
436
437 if strings.HasPrefix(l.input[l.pos-1:], l.trimRightDelim) {
438 l.backup()
439 if numSpaces == 1 {
440 return lexRightDelim
441 }
442 }
443 l.emit(itemSpace)
444 return lexInsideAction
445 }
446
447
448 func lexIdentifier(l *lexer) stateFn {
449 Loop:
450 for {
451 switch r := l.next(); {
452 case isAlphaNumeric(r):
453
454 default:
455 l.backup()
456 word := l.input[l.start:l.pos]
457 if !l.atTerminator() {
458 return l.errorf("bad character %#U", r)
459 }
460 switch {
461 case key[word] > itemKeyword:
462 l.emit(key[word])
463 case word[0] == '.':
464 l.emit(itemField)
465 case word == "true", word == "false":
466 l.emit(itemBool)
467 default:
468 l.emit(itemIdentifier)
469 }
470 break Loop
471 }
472 }
473 return lexInsideAction
474 }
475
476
477
478 func lexField(l *lexer) stateFn {
479 return lexFieldOrVariable(l, itemField)
480 }
481
482
483
484 func lexVariable(l *lexer) stateFn {
485 if l.atTerminator() {
486 l.emit(itemVariable)
487 return lexInsideAction
488 }
489 return lexFieldOrVariable(l, itemVariable)
490 }
491
492
493
494 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
495 if l.atTerminator() {
496 if typ == itemVariable {
497 l.emit(itemVariable)
498 } else {
499 l.emit(itemDot)
500 }
501 return lexInsideAction
502 }
503 var r rune
504 for {
505 r = l.next()
506 if !isAlphaNumeric(r) {
507 l.backup()
508 break
509 }
510 }
511 if !l.atTerminator() {
512 return l.errorf("bad character %#U", r)
513 }
514 l.emit(typ)
515 return lexInsideAction
516 }
517
518
519
520
521
522 func (l *lexer) atTerminator() bool {
523 r := l.peek()
524 if isSpace(r) || isEndOfLine(r) {
525 return true
526 }
527 switch r {
528 case eof, '.', ',', '|', ':', ')', '(':
529 return true
530 }
531
532
533
534 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
535 return true
536 }
537 return false
538 }
539
540
541
542 func lexChar(l *lexer) stateFn {
543 Loop:
544 for {
545 switch l.next() {
546 case '\\':
547 if r := l.next(); r != eof && r != '\n' {
548 break
549 }
550 fallthrough
551 case eof, '\n':
552 return l.errorf("unterminated character constant")
553 case '\'':
554 break Loop
555 }
556 }
557 l.emit(itemCharConstant)
558 return lexInsideAction
559 }
560
561
562
563
564
565 func lexNumber(l *lexer) stateFn {
566 if !l.scanNumber() {
567 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
568 }
569 if sign := l.peek(); sign == '+' || sign == '-' {
570
571 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
572 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
573 }
574 l.emit(itemComplex)
575 } else {
576 l.emit(itemNumber)
577 }
578 return lexInsideAction
579 }
580
581 func (l *lexer) scanNumber() bool {
582
583 l.accept("+-")
584
585 digits := "0123456789_"
586 if l.accept("0") {
587
588 if l.accept("xX") {
589 digits = "0123456789abcdefABCDEF_"
590 } else if l.accept("oO") {
591 digits = "01234567_"
592 } else if l.accept("bB") {
593 digits = "01_"
594 }
595 }
596 l.acceptRun(digits)
597 if l.accept(".") {
598 l.acceptRun(digits)
599 }
600 if len(digits) == 10+1 && l.accept("eE") {
601 l.accept("+-")
602 l.acceptRun("0123456789_")
603 }
604 if len(digits) == 16+6+1 && l.accept("pP") {
605 l.accept("+-")
606 l.acceptRun("0123456789_")
607 }
608
609 l.accept("i")
610
611 if isAlphaNumeric(l.peek()) {
612 l.next()
613 return false
614 }
615 return true
616 }
617
618
619 func lexQuote(l *lexer) stateFn {
620 Loop:
621 for {
622 switch l.next() {
623 case '\\':
624 if r := l.next(); r != eof && r != '\n' {
625 break
626 }
627 fallthrough
628 case eof, '\n':
629 return l.errorf("unterminated quoted string")
630 case '"':
631 break Loop
632 }
633 }
634 l.emit(itemString)
635 return lexInsideAction
636 }
637
638
639 func lexRawQuote(l *lexer) stateFn {
640 Loop:
641 for {
642 switch l.next() {
643 case eof:
644 return l.errorf("unterminated raw quoted string")
645 case '`':
646 break Loop
647 }
648 }
649 l.emit(itemRawString)
650 return lexInsideAction
651 }
652
653
654 func isSpace(r rune) bool {
655 return r == ' ' || r == '\t'
656 }
657
658
659 func isEndOfLine(r rune) bool {
660 return r == '\r' || r == '\n'
661 }
662
663
664 func isAlphaNumeric(r rune) bool {
665 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
666 }
667
View as plain text