From 760d88497091fb5d6d231a18e6f4e06ecb9af9b2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 10 Sep 2020 18:53:26 -0400 Subject: [PATCH 4/6] text/template: allow newlines inside action delimiters This allows multiline constructs like: {{"hello" | printf}} Now that unclosed actions can span multiple lines, track and report the start of the action when reporting errors. Also clean up a few "unexpected " to be just "". Fixes #29770. Change-Id: I54c6c016029a8328b7902a4b6d85eab713ec3285 Reviewed-on: https://go-review.googlesource.com/c/go/+/254257 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Rob Pike Dependency Patch #4 Upstream-Status: Backport from https://github.com/golang/go/commit/9384d34c58099657bb1b133beaf3ff37ada9b017 CVE: CVE-2023-24538 Signed-off-by: Shubham Kulkarni --- src/text/template/doc.go | 21 ++++----- src/text/template/exec_test.go | 2 +- src/text/template/parse/lex.go | 84 +++++++++++++++++------------------ src/text/template/parse/lex_test.go | 2 +- src/text/template/parse/parse.go | 59 +++++++++++++----------- src/text/template/parse/parse_test.go | 36 ++++++++++++--- 6 files changed, 117 insertions(+), 87 deletions(-) diff --git a/src/text/template/doc.go b/src/text/template/doc.go index 4b0efd2..7b30294 100644 --- a/src/text/template/doc.go +++ b/src/text/template/doc.go @@ -40,16 +40,17 @@ More intricate examples appear below. Text and spaces By default, all text between actions is copied verbatim when the template is -executed. For example, the string " items are made of " in the example above appears -on standard output when the program is run. - -However, to aid in formatting template source code, if an action's left delimiter -(by default "{{") is followed immediately by a minus sign and ASCII space character -("{{- "), all trailing white space is trimmed from the immediately preceding text. -Similarly, if the right delimiter ("}}") is preceded by a space and minus sign -(" -}}"), all leading white space is trimmed from the immediately following text. -In these trim markers, the ASCII space must be present; "{{-3}}" parses as an -action containing the number -3. +executed. For example, the string " items are made of " in the example above +appears on standard output when the program is run. + +However, to aid in formatting template source code, if an action's left +delimiter (by default "{{") is followed immediately by a minus sign and white +space, all trailing white space is trimmed from the immediately preceding text. +Similarly, if the right delimiter ("}}") is preceded by white space and a minus +sign, all leading white space is trimmed from the immediately following text. +In these trim markers, the white space must be present: +"{{- 3}}" is like "{{3}}" but trims the immediately preceding text, while +"{{-3}}" parses as an action containing the number -3. For instance, when executing the template whose source is diff --git a/src/text/template/exec_test.go b/src/text/template/exec_test.go index b8a809e..3309b33 100644 --- a/src/text/template/exec_test.go +++ b/src/text/template/exec_test.go @@ -1295,7 +1295,7 @@ func TestUnterminatedStringError(t *testing.T) { t.Fatal("expected error") } str := err.Error() - if !strings.Contains(str, "X:3: unexpected unterminated raw quoted string") { + if !strings.Contains(str, "X:3: unterminated raw quoted string") { t.Fatalf("unexpected error: %s", str) } } diff --git a/src/text/template/parse/lex.go b/src/text/template/parse/lex.go index e41373a..6784071 100644 --- a/src/text/template/parse/lex.go +++ b/src/text/template/parse/lex.go @@ -92,15 +92,14 @@ const eof = -1 // If the action begins "{{- " rather than "{{", then all space/tab/newlines // preceding the action are trimmed; conversely if it ends " -}}" the // leading spaces are trimmed. This is done entirely in the lexer; the -// parser never sees it happen. We require an ASCII space to be -// present to avoid ambiguity with things like "{{-3}}". It reads +// parser never sees it happen. We require an ASCII space (' ', \t, \r, \n) +// to be present to avoid ambiguity with things like "{{-3}}". It reads // better with the space present anyway. For simplicity, only ASCII -// space does the job. +// does the job. const ( - spaceChars = " \t\r\n" // These are the space characters defined by Go itself. - leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. - rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. - trimMarkerLen = Pos(len(leftTrimMarker)) + spaceChars = " \t\r\n" // These are the space characters defined by Go itself. + trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text. + trimMarkerLen = Pos(1 + 1) // marker plus space before or after ) // stateFn represents the state of the scanner as a function that returns the next state. @@ -108,19 +107,18 @@ type stateFn func(*lexer) stateFn // lexer holds the state of the scanner. type lexer struct { - name string // the name of the input; used only for error reports - input string // the string being scanned - leftDelim string // start of action - rightDelim string // end of action - trimRightDelim string // end of action with trim marker - emitComment bool // emit itemComment tokens. - pos Pos // current position in the input - start Pos // start position of this item - width Pos // width of last rune read from input - items chan item // channel of scanned items - parenDepth int // nesting depth of ( ) exprs - line int // 1+number of newlines seen - startLine int // start line of this item + name string // the name of the input; used only for error reports + input string // the string being scanned + leftDelim string // start of action + rightDelim string // end of action + emitComment bool // emit itemComment tokens. + pos Pos // current position in the input + start Pos // start position of this item + width Pos // width of last rune read from input + items chan item // channel of scanned items + parenDepth int // nesting depth of ( ) exprs + line int // 1+number of newlines seen + startLine int // start line of this item } // next returns the next rune in the input. @@ -213,15 +211,14 @@ func lex(name, input, left, right string, emitComment bool) *lexer { right = rightDelim } l := &lexer{ - name: name, - input: input, - leftDelim: left, - rightDelim: right, - trimRightDelim: rightTrimMarker + right, - emitComment: emitComment, - items: make(chan item), - line: 1, - startLine: 1, + name: name, + input: input, + leftDelim: left, + rightDelim: right, + emitComment: emitComment, + items: make(chan item), + line: 1, + startLine: 1, } go l.run() return l @@ -251,7 +248,7 @@ func lexText(l *lexer) stateFn { ldn := Pos(len(l.leftDelim)) l.pos += Pos(x) trimLength := Pos(0) - if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) { + if hasLeftTrimMarker(l.input[l.pos+ldn:]) { trimLength = rightTrimLength(l.input[l.start:l.pos]) } l.pos -= trimLength @@ -280,7 +277,7 @@ func rightTrimLength(s string) Pos { // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. func (l *lexer) atRightDelim() (delim, trimSpaces bool) { - if strings.HasPrefix(l.input[l.pos:], l.trimRightDelim) { // With trim marker. + if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker. return true, true } if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker. @@ -297,7 +294,7 @@ func leftTrimLength(s string) Pos { // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. func lexLeftDelim(l *lexer) stateFn { l.pos += Pos(len(l.leftDelim)) - trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) + trimSpace := hasLeftTrimMarker(l.input[l.pos:]) afterMarker := Pos(0) if trimSpace { afterMarker = trimMarkerLen @@ -342,7 +339,7 @@ func lexComment(l *lexer) stateFn { // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. func lexRightDelim(l *lexer) stateFn { - trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) + trimSpace := hasRightTrimMarker(l.input[l.pos:]) if trimSpace { l.pos += trimMarkerLen l.ignore() @@ -369,7 +366,7 @@ func lexInsideAction(l *lexer) stateFn { return l.errorf("unclosed left paren") } switch r := l.next(); { - case r == eof || isEndOfLine(r): + case r == eof: return l.errorf("unclosed action") case isSpace(r): l.backup() // Put space back in case we have " -}}". @@ -439,7 +436,7 @@ func lexSpace(l *lexer) stateFn { } // Be careful about a trim-marked closing delimiter, which has a minus // after a space. We know there is a space, so check for the '-' that might follow. - if strings.HasPrefix(l.input[l.pos-1:], l.trimRightDelim) { + if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) { l.backup() // Before the space. if numSpaces == 1 { return lexRightDelim // On the delim, so go right to that. @@ -526,7 +523,7 @@ func lexFieldOrVariable(l *lexer, typ itemType) stateFn { // day to implement arithmetic. func (l *lexer) atTerminator() bool { r := l.peek() - if isSpace(r) || isEndOfLine(r) { + if isSpace(r) { return true } switch r { @@ -657,15 +654,18 @@ Loop: // isSpace reports whether r is a space character. func isSpace(r rune) bool { - return r == ' ' || r == '\t' -} - -// isEndOfLine reports whether r is an end-of-line character. -func isEndOfLine(r rune) bool { - return r == '\r' || r == '\n' + return r == ' ' || r == '\t' || r == '\r' || r == '\n' } // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. func isAlphaNumeric(r rune) bool { return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) } + +func hasLeftTrimMarker(s string) bool { + return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1])) +} + +func hasRightTrimMarker(s string) bool { + return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker +} diff --git a/src/text/template/parse/lex_test.go b/src/text/template/parse/lex_test.go index f6d5f28..6510eed 100644 --- a/src/text/template/parse/lex_test.go +++ b/src/text/template/parse/lex_test.go @@ -323,7 +323,7 @@ var lexTests = []lexTest{ tLeft, mkItem(itemError, "unrecognized character in action: U+0001"), }}, - {"unclosed action", "{{\n}}", []item{ + {"unclosed action", "{{", []item{ tLeft, mkItem(itemError, "unclosed action"), }}, diff --git a/src/text/template/parse/parse.go b/src/text/template/parse/parse.go index 496d8bf..5e6e512 100644 --- a/src/text/template/parse/parse.go +++ b/src/text/template/parse/parse.go @@ -24,13 +24,14 @@ type Tree struct { Mode Mode // parsing mode. text string // text parsed to create the template (or its parent) // Parsing only; cleared after parse. - funcs []map[string]interface{} - lex *lexer - token [3]item // three-token lookahead for parser. - peekCount int - vars []string // variables defined at the moment. - treeSet map[string]*Tree - mode Mode + funcs []map[string]interface{} + lex *lexer + token [3]item // three-token lookahead for parser. + peekCount int + vars []string // variables defined at the moment. + treeSet map[string]*Tree + actionLine int // line of left delim starting action + mode Mode } // A mode value is a set of flags (or 0). Modes control parser behavior. @@ -187,6 +188,16 @@ func (t *Tree) expectOneOf(expected1, expected2 itemType, context string) item { // unexpected complains about the token and terminates processing. func (t *Tree) unexpected(token item, context string) { + if token.typ == itemError { + extra := "" + if t.actionLine != 0 && t.actionLine != token.line { + extra = fmt.Sprintf(" in action started at %s:%d", t.ParseName, t.actionLine) + if strings.HasSuffix(token.val, " action") { + extra = extra[len(" in action"):] // avoid "action in action" + } + } + t.errorf("%s%s", token, extra) + } t.errorf("unexpected %s in %s", token, context) } @@ -350,6 +361,8 @@ func (t *Tree) textOrAction() Node { case itemText: return t.newText(token.pos, token.val) case itemLeftDelim: + t.actionLine = token.line + defer t.clearActionLine() return t.action() case itemComment: return t.newComment(token.pos, token.val) @@ -359,6 +372,10 @@ func (t *Tree) textOrAction() Node { return nil } +func (t *Tree) clearActionLine() { + t.actionLine = 0 +} + // Action: // control // command ("|" command)* @@ -384,12 +401,12 @@ func (t *Tree) action() (n Node) { t.backup() token := t.peek() // Do not pop variables; they persist until "end". - return t.newAction(token.pos, token.line, t.pipeline("command")) + return t.newAction(token.pos, token.line, t.pipeline("command", itemRightDelim)) } // Pipeline: // declarations? command ('|' command)* -func (t *Tree) pipeline(context string) (pipe *PipeNode) { +func (t *Tree) pipeline(context string, end itemType) (pipe *PipeNode) { token := t.peekNonSpace() pipe = t.newPipeline(token.pos, token.line, nil) // Are there declarations or assignments? @@ -430,12 +447,9 @@ decls: } for { switch token := t.nextNonSpace(); token.typ { - case itemRightDelim, itemRightParen: + case end: // At this point, the pipeline is complete t.checkPipeline(pipe, context) - if token.typ == itemRightParen { - t.backup() - } return case itemBool, itemCharConstant, itemComplex, itemDot, itemField, itemIdentifier, itemNumber, itemNil, itemRawString, itemString, itemVariable, itemLeftParen: @@ -464,7 +478,7 @@ func (t *Tree) checkPipeline(pipe *PipeNode, context string) { func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) { defer t.popVars(len(t.vars)) - pipe = t.pipeline(context) + pipe = t.pipeline(context, itemRightDelim) var next Node list, next = t.itemList() switch next.Type() { @@ -550,7 +564,7 @@ func (t *Tree) blockControl() Node { token := t.nextNonSpace() name := t.parseTemplateName(token, context) - pipe := t.pipeline(context) + pipe := t.pipeline(context, itemRightDelim) block := New(name) // name will be updated once we know it. block.text = t.text @@ -580,7 +594,7 @@ func (t *Tree) templateControl() Node { if t.nextNonSpace().typ != itemRightDelim { t.backup() // Do not pop variables; they persist until "end". - pipe = t.pipeline(context) + pipe = t.pipeline(context, itemRightDelim) } return t.newTemplate(token.pos, token.line, name, pipe) } @@ -614,13 +628,12 @@ func (t *Tree) command() *CommandNode { switch token := t.next(); token.typ { case itemSpace: continue - case itemError: - t.errorf("%s", token.val) case itemRightDelim, itemRightParen: t.backup() case itemPipe: + // nothing here; break loop below default: - t.errorf("unexpected %s in operand", token) + t.unexpected(token, "operand") } break } @@ -675,8 +688,6 @@ func (t *Tree) operand() Node { // A nil return means the next item is not a term. func (t *Tree) term() Node { switch token := t.nextNonSpace(); token.typ { - case itemError: - t.errorf("%s", token.val) case itemIdentifier: if !t.hasFunction(token.val) { t.errorf("function %q not defined", token.val) @@ -699,11 +710,7 @@ func (t *Tree) term() Node { } return number case itemLeftParen: - pipe := t.pipeline("parenthesized pipeline") - if token := t.next(); token.typ != itemRightParen { - t.errorf("unclosed right paren: unexpected %s", token) - } - return pipe + return t.pipeline("parenthesized pipeline", itemRightParen) case itemString, itemRawString: s, err := strconv.Unquote(token.val) if err != nil { diff --git a/src/text/template/parse/parse_test.go b/src/text/template/parse/parse_test.go index d9c13c5..220f984 100644 --- a/src/text/template/parse/parse_test.go +++ b/src/text/template/parse/parse_test.go @@ -250,6 +250,13 @@ var parseTests = []parseTest{ {"comment trim left and right", "x \r\n\t{{- /* */ -}}\n\n\ty", noError, `"x""y"`}, {"block definition", `{{block "foo" .}}hello{{end}}`, noError, `{{template "foo" .}}`}, + + {"newline in assignment", "{{ $x \n := \n 1 \n }}", noError, "{{$x := 1}}"}, + {"newline in empty action", "{{\n}}", hasError, "{{\n}}"}, + {"newline in pipeline", "{{\n\"x\"\n|\nprintf\n}}", noError, `{{"x" | printf}}`}, + {"newline in comment", "{{/*\nhello\n*/}}", noError, ""}, + {"newline in comment", "{{-\n/*\nhello\n*/\n-}}", noError, ""}, + // Errors. {"unclosed action", "hello{{range", hasError, ""}, {"unmatched end", "{{end}}", hasError, ""}, @@ -426,23 +433,38 @@ var errorTests = []parseTest{ // Check line numbers are accurate. {"unclosed1", "line1\n{{", - hasError, `unclosed1:2: unexpected unclosed action in command`}, + hasError, `unclosed1:2: unclosed action`}, {"unclosed2", "line1\n{{define `x`}}line2\n{{", - hasError, `unclosed2:3: unexpected unclosed action in command`}, + hasError, `unclosed2:3: unclosed action`}, + {"unclosed3", + "line1\n{{\"x\"\n\"y\"\n", + hasError, `unclosed3:4: unclosed action started at unclosed3:2`}, + {"unclosed4", + "{{\n\n\n\n\n", + hasError, `unclosed4:6: unclosed action started at unclosed4:1`}, + {"var1", + "line1\n{{\nx\n}}", + hasError, `var1:3: function "x" not defined`}, // Specific errors. {"function", "{{foo}}", hasError, `function "foo" not defined`}, - {"comment", + {"comment1", "{{/*}}", - hasError, `unclosed comment`}, + hasError, `comment1:1: unclosed comment`}, + {"comment2", + "{{/*\nhello\n}}", + hasError, `comment2:1: unclosed comment`}, {"lparen", "{{.X (1 2 3}}", hasError, `unclosed left paren`}, {"rparen", - "{{.X 1 2 3)}}", - hasError, `unexpected ")"`}, + "{{.X 1 2 3 ) }}", + hasError, `unexpected ")" in command`}, + {"rparen2", + "{{(.X 1 2 3", + hasError, `unclosed action`}, {"space", "{{`x`3}}", hasError, `in operand`}, @@ -488,7 +510,7 @@ var errorTests = []parseTest{ hasError, `missing value for parenthesized pipeline`}, {"multilinerawstring", "{{ $v := `\n` }} {{", - hasError, `multilinerawstring:2: unexpected unclosed action`}, + hasError, `multilinerawstring:2: unclosed action`}, {"rangeundefvar", "{{range $k}}{{end}}", hasError, `undefined variable`}, -- 2.7.4