handlebars-go/parser/parser.go

851 lines
18 KiB
Go

// Package parser provides a handlebars syntax analyser. It consumes the tokens provided by the lexer to build an AST.
package parser
import (
"fmt"
"regexp"
"runtime"
"strconv"
"git.reinaldyrafli.com/aldy505/handlebars-go/ast"
"git.reinaldyrafli.com/aldy505/handlebars-go/lexer"
)
// References:
// - https://github.com/wycats/handlebars.js/blob/master/src/handlebars.yy
// - https://github.com/golang/go/blob/master/src/text/template/parse/parse.go
// parser is a syntax analyzer.
type parser struct {
// Lexer
lex *lexer.Lexer
// Root node
root ast.Node
// Tokens parsed but not consumed yet
tokens []*lexer.Token
// All tokens have been retreieved from lexer
lexOver bool
}
var (
rOpenComment = regexp.MustCompile(`^\{\{~?!-?-?`)
rCloseComment = regexp.MustCompile(`-?-?~?\}\}$`)
rOpenAmp = regexp.MustCompile(`^\{\{~?&`)
)
// new instanciates a new parser
func new(input string) *parser {
return &parser{
lex: lexer.Scan(input),
}
}
// Parse analyzes given input and returns the AST root node.
func Parse(input string) (result *ast.Program, err error) {
// recover error
defer errRecover(&err)
parser := new(input)
// parse
result = parser.parseProgram()
// check last token
token := parser.shift()
if token.Kind != lexer.TokenEOF {
// Parsing ended before EOF
errToken(token, "Syntax error")
}
// fix whitespaces
processWhitespaces(result)
// named returned values
return
}
// errRecover recovers parsing panic
func errRecover(errp *error) {
e := recover()
if e != nil {
switch err := e.(type) {
case runtime.Error:
panic(e)
case error:
*errp = err
default:
panic(e)
}
}
}
// errPanic panics
func errPanic(err error, line int) {
panic(fmt.Errorf("Parse error on line %d:\n%s", line, err))
}
// errNode panics with given node infos
func errNode(node ast.Node, msg string) {
errPanic(fmt.Errorf("%s\nNode: %s", msg, node), node.Location().Line)
}
// errNode panics with given Token infos
func errToken(tok *lexer.Token, msg string) {
errPanic(fmt.Errorf("%s\nToken: %s", msg, tok), tok.Line)
}
// errNode panics because of an unexpected Token kind
func errExpected(expect lexer.TokenKind, tok *lexer.Token) {
errPanic(fmt.Errorf("Expecting %s, got: '%s'", expect, tok), tok.Line)
}
// program : statement*
func (p *parser) parseProgram() *ast.Program {
result := ast.NewProgram(p.next().Pos, p.next().Line)
for p.isStatement() {
result.AddStatement(p.parseStatement())
}
return result
}
// statement : mustache | block | rawBlock | partial | content | COMMENT
func (p *parser) parseStatement() ast.Node {
var result ast.Node
tok := p.next()
switch tok.Kind {
case lexer.TokenOpen, lexer.TokenOpenUnescaped:
// mustache
result = p.parseMustache()
case lexer.TokenOpenBlock:
// block
result = p.parseBlock()
case lexer.TokenOpenInverse:
// block
result = p.parseInverse()
case lexer.TokenOpenRawBlock:
// rawBlock
result = p.parseRawBlock()
case lexer.TokenOpenPartial:
// partial
result = p.parsePartial()
case lexer.TokenContent:
// content
result = p.parseContent()
case lexer.TokenComment:
// COMMENT
result = p.parseComment()
}
return result
}
// isStatement returns true if next token starts a statement
func (p *parser) isStatement() bool {
if !p.have(1) {
return false
}
switch p.next().Kind {
case lexer.TokenOpen, lexer.TokenOpenUnescaped, lexer.TokenOpenBlock,
lexer.TokenOpenInverse, lexer.TokenOpenRawBlock, lexer.TokenOpenPartial,
lexer.TokenContent, lexer.TokenComment:
return true
}
return false
}
// content : CONTENT
func (p *parser) parseContent() *ast.ContentStatement {
// CONTENT
tok := p.shift()
if tok.Kind != lexer.TokenContent {
// @todo This check can be removed if content is optional in a raw block
errExpected(lexer.TokenContent, tok)
}
return ast.NewContentStatement(tok.Pos, tok.Line, tok.Val)
}
// COMMENT
func (p *parser) parseComment() *ast.CommentStatement {
// COMMENT
tok := p.shift()
value := rOpenComment.ReplaceAllString(tok.Val, "")
value = rCloseComment.ReplaceAllString(value, "")
result := ast.NewCommentStatement(tok.Pos, tok.Line, value)
result.Strip = ast.NewStripForStr(tok.Val)
return result
}
// param* hash?
func (p *parser) parseExpressionParamsHash() ([]ast.Node, *ast.Hash) {
var params []ast.Node
var hash *ast.Hash
// params*
if p.isParam() {
params = p.parseParams()
}
// hash?
if p.isHashSegment() {
hash = p.parseHash()
}
return params, hash
}
// helperName param* hash?
func (p *parser) parseExpression(tok *lexer.Token) *ast.Expression {
result := ast.NewExpression(tok.Pos, tok.Line)
// helperName
result.Path = p.parseHelperName()
// param* hash?
result.Params, result.Hash = p.parseExpressionParamsHash()
return result
}
// rawBlock : openRawBlock content endRawBlock
// openRawBlock : OPEN_RAW_BLOCK helperName param* hash? CLOSE_RAW_BLOCK
// endRawBlock : OPEN_END_RAW_BLOCK helperName CLOSE_RAW_BLOCK
func (p *parser) parseRawBlock() *ast.BlockStatement {
// OPEN_RAW_BLOCK
tok := p.shift()
result := ast.NewBlockStatement(tok.Pos, tok.Line)
// helperName param* hash?
result.Expression = p.parseExpression(tok)
openName := result.Expression.Canonical()
// CLOSE_RAW_BLOCK
tok = p.shift()
if tok.Kind != lexer.TokenCloseRawBlock {
errExpected(lexer.TokenCloseRawBlock, tok)
}
// content
// @todo Is content mandatory in a raw block ?
content := p.parseContent()
program := ast.NewProgram(tok.Pos, tok.Line)
program.AddStatement(content)
result.Program = program
// OPEN_END_RAW_BLOCK
tok = p.shift()
if tok.Kind != lexer.TokenOpenEndRawBlock {
// should never happen as it is caught by lexer
errExpected(lexer.TokenOpenEndRawBlock, tok)
}
// helperName
endID := p.parseHelperName()
closeName, ok := ast.HelperNameStr(endID)
if !ok {
errNode(endID, "Erroneous closing expression")
}
if openName != closeName {
errNode(endID, fmt.Sprintf("%s doesn't match %s", openName, closeName))
}
// CLOSE_RAW_BLOCK
tok = p.shift()
if tok.Kind != lexer.TokenCloseRawBlock {
errExpected(lexer.TokenCloseRawBlock, tok)
}
return result
}
// block : openBlock program inverseChain? closeBlock
func (p *parser) parseBlock() *ast.BlockStatement {
// openBlock
result, blockParams := p.parseOpenBlock()
// program
program := p.parseProgram()
program.BlockParams = blockParams
result.Program = program
// inverseChain?
if p.isInverseChain() {
result.Inverse = p.parseInverseChain()
}
// closeBlock
p.parseCloseBlock(result)
setBlockInverseStrip(result)
return result
}
// setBlockInverseStrip is called when parsing `block` (openBlock | openInverse) and `inverseChain`
//
// TODO: This was totally cargo culted ! CHECK THAT !
//
// cf. prepareBlock() in:
//
// https://github.com/wycats/handlebars.js/blob/master/lib/handlebars/compiler/helper.js
func setBlockInverseStrip(block *ast.BlockStatement) {
if block.Inverse == nil {
return
}
if block.Inverse.Chained {
b, _ := block.Inverse.Body[0].(*ast.BlockStatement)
b.CloseStrip = block.CloseStrip
}
block.InverseStrip = block.Inverse.Strip
}
// block : openInverse program inverseAndProgram? closeBlock
func (p *parser) parseInverse() *ast.BlockStatement {
// openInverse
result, blockParams := p.parseOpenBlock()
// program
program := p.parseProgram()
program.BlockParams = blockParams
result.Inverse = program
// inverseAndProgram?
if p.isInverse() {
result.Program = p.parseInverseAndProgram()
}
// closeBlock
p.parseCloseBlock(result)
setBlockInverseStrip(result)
return result
}
// helperName param* hash? blockParams?
func (p *parser) parseOpenBlockExpression(tok *lexer.Token) (*ast.BlockStatement, []string) {
var blockParams []string
result := ast.NewBlockStatement(tok.Pos, tok.Line)
// helperName param* hash?
result.Expression = p.parseExpression(tok)
// blockParams?
if p.isBlockParams() {
blockParams = p.parseBlockParams()
}
// named returned values
return result, blockParams
}
// inverseChain : openInverseChain program inverseChain?
//
// | inverseAndProgram
func (p *parser) parseInverseChain() *ast.Program {
if p.isInverse() {
// inverseAndProgram
return p.parseInverseAndProgram()
}
result := ast.NewProgram(p.next().Pos, p.next().Line)
// openInverseChain
block, blockParams := p.parseOpenBlock()
// program
program := p.parseProgram()
program.BlockParams = blockParams
block.Program = program
// inverseChain?
if p.isInverseChain() {
block.Inverse = p.parseInverseChain()
}
setBlockInverseStrip(block)
result.Chained = true
result.AddStatement(block)
return result
}
// Returns true if current token starts an inverse chain
func (p *parser) isInverseChain() bool {
return p.isOpenInverseChain() || p.isInverse()
}
// inverseAndProgram : INVERSE program
func (p *parser) parseInverseAndProgram() *ast.Program {
// INVERSE
tok := p.shift()
// program
result := p.parseProgram()
result.Strip = ast.NewStripForStr(tok.Val)
return result
}
// openBlock : OPEN_BLOCK helperName param* hash? blockParams? CLOSE
// openInverse : OPEN_INVERSE helperName param* hash? blockParams? CLOSE
// openInverseChain: OPEN_INVERSE_CHAIN helperName param* hash? blockParams? CLOSE
func (p *parser) parseOpenBlock() (*ast.BlockStatement, []string) {
// OPEN_BLOCK | OPEN_INVERSE | OPEN_INVERSE_CHAIN
tok := p.shift()
// helperName param* hash? blockParams?
result, blockParams := p.parseOpenBlockExpression(tok)
// CLOSE
tokClose := p.shift()
if tokClose.Kind != lexer.TokenClose {
errExpected(lexer.TokenClose, tokClose)
}
result.OpenStrip = ast.NewStrip(tok.Val, tokClose.Val)
// named returned values
return result, blockParams
}
// closeBlock : OPEN_ENDBLOCK helperName CLOSE
func (p *parser) parseCloseBlock(block *ast.BlockStatement) {
// OPEN_ENDBLOCK
tok := p.shift()
if tok.Kind != lexer.TokenOpenEndBlock {
errExpected(lexer.TokenOpenEndBlock, tok)
}
// helperName
endID := p.parseHelperName()
closeName, ok := ast.HelperNameStr(endID)
if !ok {
errNode(endID, "Erroneous closing expression")
}
openName := block.Expression.Canonical()
if openName != closeName {
errNode(endID, fmt.Sprintf("%s doesn't match %s", openName, closeName))
}
// CLOSE
tokClose := p.shift()
if tokClose.Kind != lexer.TokenClose {
errExpected(lexer.TokenClose, tokClose)
}
block.CloseStrip = ast.NewStrip(tok.Val, tokClose.Val)
}
// mustache : OPEN helperName param* hash? CLOSE
//
// | OPEN_UNESCAPED helperName param* hash? CLOSE_UNESCAPED
func (p *parser) parseMustache() *ast.MustacheStatement {
// OPEN | OPEN_UNESCAPED
tok := p.shift()
closeToken := lexer.TokenClose
if tok.Kind == lexer.TokenOpenUnescaped {
closeToken = lexer.TokenCloseUnescaped
}
unescaped := false
if (tok.Kind == lexer.TokenOpenUnescaped) || (rOpenAmp.MatchString(tok.Val)) {
unescaped = true
}
result := ast.NewMustacheStatement(tok.Pos, tok.Line, unescaped)
// helperName param* hash?
result.Expression = p.parseExpression(tok)
// CLOSE | CLOSE_UNESCAPED
tokClose := p.shift()
if tokClose.Kind != closeToken {
errExpected(closeToken, tokClose)
}
result.Strip = ast.NewStrip(tok.Val, tokClose.Val)
return result
}
// partial : OPEN_PARTIAL partialName param* hash? CLOSE
func (p *parser) parsePartial() *ast.PartialStatement {
// OPEN_PARTIAL
tok := p.shift()
result := ast.NewPartialStatement(tok.Pos, tok.Line)
// partialName
result.Name = p.parsePartialName()
// param* hash?
result.Params, result.Hash = p.parseExpressionParamsHash()
// CLOSE
tokClose := p.shift()
if tokClose.Kind != lexer.TokenClose {
errExpected(lexer.TokenClose, tokClose)
}
result.Strip = ast.NewStrip(tok.Val, tokClose.Val)
return result
}
// helperName | sexpr
func (p *parser) parseHelperNameOrSexpr() ast.Node {
if p.isSexpr() {
// sexpr
return p.parseSexpr()
}
// helperName
return p.parseHelperName()
}
// param : helperName | sexpr
func (p *parser) parseParam() ast.Node {
return p.parseHelperNameOrSexpr()
}
// Returns true if next tokens represent a `param`
func (p *parser) isParam() bool {
return (p.isSexpr() || p.isHelperName()) && !p.isHashSegment()
}
// param*
func (p *parser) parseParams() []ast.Node {
var result []ast.Node
for p.isParam() {
result = append(result, p.parseParam())
}
return result
}
// sexpr : OPEN_SEXPR helperName param* hash? CLOSE_SEXPR
func (p *parser) parseSexpr() *ast.SubExpression {
// OPEN_SEXPR
tok := p.shift()
result := ast.NewSubExpression(tok.Pos, tok.Line)
// helperName param* hash?
result.Expression = p.parseExpression(tok)
// CLOSE_SEXPR
tok = p.shift()
if tok.Kind != lexer.TokenCloseSexpr {
errExpected(lexer.TokenCloseSexpr, tok)
}
return result
}
// hash : hashSegment+
func (p *parser) parseHash() *ast.Hash {
var pairs []*ast.HashPair
for p.isHashSegment() {
pairs = append(pairs, p.parseHashSegment())
}
firstLoc := pairs[0].Location()
result := ast.NewHash(firstLoc.Pos, firstLoc.Line)
result.Pairs = pairs
return result
}
// returns true if next tokens represents a `hashSegment`
func (p *parser) isHashSegment() bool {
return p.have(2) && (p.next().Kind == lexer.TokenID) && (p.nextAt(1).Kind == lexer.TokenEquals)
}
// hashSegment : ID EQUALS param
func (p *parser) parseHashSegment() *ast.HashPair {
// ID
tok := p.shift()
// EQUALS
p.shift()
// param
param := p.parseParam()
result := ast.NewHashPair(tok.Pos, tok.Line)
result.Key = tok.Val
result.Val = param
return result
}
// blockParams : OPEN_BLOCK_PARAMS ID+ CLOSE_BLOCK_PARAMS
func (p *parser) parseBlockParams() []string {
var result []string
// OPEN_BLOCK_PARAMS
tok := p.shift()
// ID+
for p.isID() {
result = append(result, p.shift().Val)
}
if len(result) == 0 {
errExpected(lexer.TokenID, p.next())
}
// CLOSE_BLOCK_PARAMS
tok = p.shift()
if tok.Kind != lexer.TokenCloseBlockParams {
errExpected(lexer.TokenCloseBlockParams, tok)
}
return result
}
// helperName : path | dataName | STRING | NUMBER | BOOLEAN | UNDEFINED | NULL
func (p *parser) parseHelperName() ast.Node {
var result ast.Node
tok := p.next()
switch tok.Kind {
case lexer.TokenBoolean:
// BOOLEAN
p.shift()
result = ast.NewBooleanLiteral(tok.Pos, tok.Line, (tok.Val == "true"), tok.Val)
case lexer.TokenNumber:
// NUMBER
p.shift()
val, isInt := parseNumber(tok)
result = ast.NewNumberLiteral(tok.Pos, tok.Line, val, isInt, tok.Val)
case lexer.TokenString:
// STRING
p.shift()
result = ast.NewStringLiteral(tok.Pos, tok.Line, tok.Val)
case lexer.TokenData:
// dataName
result = p.parseDataName()
default:
// path
result = p.parsePath(false)
}
return result
}
// parseNumber parses a number
func parseNumber(tok *lexer.Token) (result float64, isInt bool) {
var valInt int
var err error
valInt, err = strconv.Atoi(tok.Val)
if err == nil {
isInt = true
result = float64(valInt)
} else {
isInt = false
result, err = strconv.ParseFloat(tok.Val, 64)
if err != nil {
errToken(tok, fmt.Sprintf("Failed to parse number: %s", tok.Val))
}
}
// named returned values
return
}
// Returns true if next tokens represent a `helperName`
func (p *parser) isHelperName() bool {
switch p.next().Kind {
case lexer.TokenBoolean, lexer.TokenNumber, lexer.TokenString, lexer.TokenData, lexer.TokenID:
return true
}
return false
}
// partialName : helperName | sexpr
func (p *parser) parsePartialName() ast.Node {
return p.parseHelperNameOrSexpr()
}
// dataName : DATA pathSegments
func (p *parser) parseDataName() *ast.PathExpression {
// DATA
p.shift()
// pathSegments
return p.parsePath(true)
}
// path : pathSegments
// pathSegments : pathSegments SEP ID
//
// | ID
func (p *parser) parsePath(data bool) *ast.PathExpression {
var tok *lexer.Token
// ID
tok = p.shift()
if tok.Kind != lexer.TokenID {
errExpected(lexer.TokenID, tok)
}
result := ast.NewPathExpression(tok.Pos, tok.Line, data)
result.Part(tok.Val)
for p.isPathSep() {
// SEP
tok = p.shift()
result.Sep(tok.Val)
// ID
tok = p.shift()
if tok.Kind != lexer.TokenID {
errExpected(lexer.TokenID, tok)
}
result.Part(tok.Val)
if len(result.Parts) > 0 {
switch tok.Val {
case "..", ".", "this":
errToken(tok, "Invalid path: "+result.Original)
}
}
}
return result
}
// Ensures there is token to parse at given index
func (p *parser) ensure(index int) {
if p.lexOver {
// nothing more to grab
return
}
nb := index + 1
for len(p.tokens) < nb {
// fetch next token
tok := p.lex.NextToken()
// queue it
p.tokens = append(p.tokens, &tok)
if (tok.Kind == lexer.TokenEOF) || (tok.Kind == lexer.TokenError) {
p.lexOver = true
break
}
}
}
// have returns true is there are a list given number of tokens to consume left
func (p *parser) have(nb int) bool {
p.ensure(nb - 1)
return len(p.tokens) >= nb
}
// nextAt returns next token at given index, without consuming it
func (p *parser) nextAt(index int) *lexer.Token {
p.ensure(index)
return p.tokens[index]
}
// next returns next token without consuming it
func (p *parser) next() *lexer.Token {
return p.nextAt(0)
}
// shift returns next token and remove it from the tokens buffer
//
// Panics if next token is `TokenError`
func (p *parser) shift() *lexer.Token {
var result *lexer.Token
p.ensure(0)
result, p.tokens = p.tokens[0], p.tokens[1:]
// check error token
if result.Kind == lexer.TokenError {
errToken(result, "Lexer error")
}
return result
}
// isToken returns true if next token is of given type
func (p *parser) isToken(kind lexer.TokenKind) bool {
return p.have(1) && p.next().Kind == kind
}
// isSexpr returns true if next token starts a sexpr
func (p *parser) isSexpr() bool {
return p.isToken(lexer.TokenOpenSexpr)
}
// isPathSep returns true if next token is a path separator
func (p *parser) isPathSep() bool {
return p.isToken(lexer.TokenSep)
}
// isID returns true if next token is an ID
func (p *parser) isID() bool {
return p.isToken(lexer.TokenID)
}
// isBlockParams returns true if next token starts a block params
func (p *parser) isBlockParams() bool {
return p.isToken(lexer.TokenOpenBlockParams)
}
// isInverse returns true if next token starts an INVERSE sequence
func (p *parser) isInverse() bool {
return p.isToken(lexer.TokenInverse)
}
// isOpenInverseChain returns true if next token is OPEN_INVERSE_CHAIN
func (p *parser) isOpenInverseChain() bool {
return p.isToken(lexer.TokenOpenInverseChain)
}