414 lines
12 KiB
Go
414 lines
12 KiB
Go
// Copyright 2023 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package inlheur
|
|
|
|
import (
|
|
"cmd/compile/internal/ir"
|
|
"cmd/compile/internal/pgoir"
|
|
"cmd/compile/internal/typecheck"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
type callSiteAnalyzer struct {
|
|
fn *ir.Func
|
|
*nameFinder
|
|
}
|
|
|
|
type callSiteTableBuilder struct {
|
|
fn *ir.Func
|
|
*nameFinder
|
|
cstab CallSiteTab
|
|
ptab map[ir.Node]pstate
|
|
nstack []ir.Node
|
|
loopNest int
|
|
isInit bool
|
|
}
|
|
|
|
func makeCallSiteAnalyzer(fn *ir.Func) *callSiteAnalyzer {
|
|
return &callSiteAnalyzer{
|
|
fn: fn,
|
|
nameFinder: newNameFinder(fn),
|
|
}
|
|
}
|
|
|
|
func makeCallSiteTableBuilder(fn *ir.Func, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) *callSiteTableBuilder {
|
|
isInit := fn.IsPackageInit() || strings.HasPrefix(fn.Sym().Name, "init.")
|
|
return &callSiteTableBuilder{
|
|
fn: fn,
|
|
cstab: cstab,
|
|
ptab: ptab,
|
|
isInit: isInit,
|
|
loopNest: loopNestingLevel,
|
|
nstack: []ir.Node{fn},
|
|
nameFinder: nf,
|
|
}
|
|
}
|
|
|
|
// computeCallSiteTable builds and returns a table of call sites for
|
|
// the specified region in function fn. A region here corresponds to a
|
|
// specific subtree within the AST for a function. The main intended
|
|
// use cases are for 'region' to be either A) an entire function body,
|
|
// or B) an inlined call expression.
|
|
func computeCallSiteTable(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) CallSiteTab {
|
|
cstb := makeCallSiteTableBuilder(fn, cstab, ptab, loopNestingLevel, nf)
|
|
var doNode func(ir.Node) bool
|
|
doNode = func(n ir.Node) bool {
|
|
cstb.nodeVisitPre(n)
|
|
ir.DoChildren(n, doNode)
|
|
cstb.nodeVisitPost(n)
|
|
return false
|
|
}
|
|
for _, n := range region {
|
|
doNode(n)
|
|
}
|
|
return cstb.cstab
|
|
}
|
|
|
|
func (cstb *callSiteTableBuilder) flagsForNode(call *ir.CallExpr) CSPropBits {
|
|
var r CSPropBits
|
|
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= analyzing call at %s\n",
|
|
fmtFullPos(call.Pos()))
|
|
}
|
|
|
|
// Set a bit if this call is within a loop.
|
|
if cstb.loopNest > 0 {
|
|
r |= CallSiteInLoop
|
|
}
|
|
|
|
// Set a bit if the call is within an init function (either
|
|
// compiler-generated or user-written).
|
|
if cstb.isInit {
|
|
r |= CallSiteInInitFunc
|
|
}
|
|
|
|
// Decide whether to apply the panic path heuristic. Hack: don't
|
|
// apply this heuristic in the function "main.main" (mostly just
|
|
// to avoid annoying users).
|
|
if !isMainMain(cstb.fn) {
|
|
r = cstb.determinePanicPathBits(call, r)
|
|
}
|
|
|
|
return r
|
|
}
|
|
|
|
// determinePanicPathBits updates the CallSiteOnPanicPath bit within
|
|
// "r" if we think this call is on an unconditional path to
|
|
// panic/exit. Do this by walking back up the node stack to see if we
|
|
// can find either A) an enclosing panic, or B) a statement node that
|
|
// we've determined leads to a panic/exit.
|
|
func (cstb *callSiteTableBuilder) determinePanicPathBits(call ir.Node, r CSPropBits) CSPropBits {
|
|
cstb.nstack = append(cstb.nstack, call)
|
|
defer func() {
|
|
cstb.nstack = cstb.nstack[:len(cstb.nstack)-1]
|
|
}()
|
|
|
|
for ri := range cstb.nstack[:len(cstb.nstack)-1] {
|
|
i := len(cstb.nstack) - ri - 1
|
|
n := cstb.nstack[i]
|
|
_, isCallExpr := n.(*ir.CallExpr)
|
|
_, isStmt := n.(ir.Stmt)
|
|
if isCallExpr {
|
|
isStmt = false
|
|
}
|
|
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
ps, inps := cstb.ptab[n]
|
|
fmt.Fprintf(os.Stderr, "=-= callpar %d op=%s ps=%s inptab=%v stmt=%v\n", i, n.Op().String(), ps.String(), inps, isStmt)
|
|
}
|
|
|
|
if n.Op() == ir.OPANIC {
|
|
r |= CallSiteOnPanicPath
|
|
break
|
|
}
|
|
if v, ok := cstb.ptab[n]; ok {
|
|
if v == psCallsPanic {
|
|
r |= CallSiteOnPanicPath
|
|
break
|
|
}
|
|
if isStmt {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// propsForArg returns property bits for a given call argument expression arg.
|
|
func (cstb *callSiteTableBuilder) propsForArg(arg ir.Node) ActualExprPropBits {
|
|
if cval := cstb.constValue(arg); cval != nil {
|
|
return ActualExprConstant
|
|
}
|
|
if cstb.isConcreteConvIface(arg) {
|
|
return ActualExprIsConcreteConvIface
|
|
}
|
|
fname := cstb.funcName(arg)
|
|
if fname != nil {
|
|
if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) {
|
|
return ActualExprIsInlinableFunc
|
|
}
|
|
return ActualExprIsFunc
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// argPropsForCall returns a slice of argument properties for the
|
|
// expressions being passed to the callee in the specific call
|
|
// expression; these will be stored in the CallSite object for a given
|
|
// call and then consulted when scoring. If no arg has any interesting
|
|
// properties we try to save some space and return a nil slice.
|
|
func (cstb *callSiteTableBuilder) argPropsForCall(ce *ir.CallExpr) []ActualExprPropBits {
|
|
rv := make([]ActualExprPropBits, len(ce.Args))
|
|
somethingInteresting := false
|
|
for idx := range ce.Args {
|
|
argProp := cstb.propsForArg(ce.Args[idx])
|
|
somethingInteresting = somethingInteresting || (argProp != 0)
|
|
rv[idx] = argProp
|
|
}
|
|
if !somethingInteresting {
|
|
return nil
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (cstb *callSiteTableBuilder) addCallSite(callee *ir.Func, call *ir.CallExpr) {
|
|
flags := cstb.flagsForNode(call)
|
|
argProps := cstb.argPropsForCall(call)
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= props %+v for call %v\n", argProps, call)
|
|
}
|
|
// FIXME: maybe bulk-allocate these?
|
|
cs := &CallSite{
|
|
Call: call,
|
|
Callee: callee,
|
|
Assign: cstb.containingAssignment(call),
|
|
ArgProps: argProps,
|
|
Flags: flags,
|
|
ID: uint(len(cstb.cstab)),
|
|
}
|
|
if _, ok := cstb.cstab[call]; ok {
|
|
fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n",
|
|
fmtFullPos(call.Pos()))
|
|
fmt.Fprintf(os.Stderr, "*** call: %+v\n", call)
|
|
panic("bad")
|
|
}
|
|
// Set initial score for callsite to the cost computed
|
|
// by CanInline; this score will be refined later based
|
|
// on heuristics.
|
|
cs.Score = int(callee.Inl.Cost)
|
|
|
|
if cstb.cstab == nil {
|
|
cstb.cstab = make(CallSiteTab)
|
|
}
|
|
cstb.cstab[call] = cs
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= added callsite: caller=%v callee=%v n=%s\n",
|
|
cstb.fn, callee, fmtFullPos(call.Pos()))
|
|
}
|
|
}
|
|
|
|
func (cstb *callSiteTableBuilder) nodeVisitPre(n ir.Node) {
|
|
switch n.Op() {
|
|
case ir.ORANGE, ir.OFOR:
|
|
if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) {
|
|
cstb.loopNest++
|
|
}
|
|
case ir.OCALLFUNC:
|
|
ce := n.(*ir.CallExpr)
|
|
callee := pgoir.DirectCallee(ce.Fun)
|
|
if callee != nil && callee.Inl != nil {
|
|
cstb.addCallSite(callee, ce)
|
|
}
|
|
}
|
|
cstb.nstack = append(cstb.nstack, n)
|
|
}
|
|
|
|
func (cstb *callSiteTableBuilder) nodeVisitPost(n ir.Node) {
|
|
cstb.nstack = cstb.nstack[:len(cstb.nstack)-1]
|
|
switch n.Op() {
|
|
case ir.ORANGE, ir.OFOR:
|
|
if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) {
|
|
cstb.loopNest--
|
|
}
|
|
}
|
|
}
|
|
|
|
func loopBody(n ir.Node) ir.Nodes {
|
|
if forst, ok := n.(*ir.ForStmt); ok {
|
|
return forst.Body
|
|
}
|
|
if rst, ok := n.(*ir.RangeStmt); ok {
|
|
return rst.Body
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// hasTopLevelLoopBodyReturnOrBreak examines the body of a "for" or
|
|
// "range" loop to try to verify that it is a real loop, as opposed to
|
|
// a construct that is syntactically loopy but doesn't actually iterate
|
|
// multiple times, like:
|
|
//
|
|
// for {
|
|
// blah()
|
|
// return 1
|
|
// }
|
|
//
|
|
// [Remark: the pattern above crops up quite a bit in the source code
|
|
// for the compiler itself, e.g. the auto-generated rewrite code]
|
|
//
|
|
// Note that we don't look for GOTO statements here, so it's possible
|
|
// we'll get the wrong result for a loop with complicated control
|
|
// jumps via gotos.
|
|
func hasTopLevelLoopBodyReturnOrBreak(loopBody ir.Nodes) bool {
|
|
for _, n := range loopBody {
|
|
if n.Op() == ir.ORETURN || n.Op() == ir.OBREAK {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// containingAssignment returns the top-level assignment statement
|
|
// for a statement level function call "n". Examples:
|
|
//
|
|
// x := foo()
|
|
// x, y := bar(z, baz())
|
|
// if blah() { ...
|
|
//
|
|
// Here the top-level assignment statement for the foo() call is the
|
|
// statement assigning to "x"; the top-level assignment for "bar()"
|
|
// call is the assignment to x,y. For the baz() and blah() calls,
|
|
// there is no top level assignment statement.
|
|
//
|
|
// The unstated goal here is that we want to use the containing
|
|
// assignment to establish a connection between a given call and the
|
|
// variables to which its results/returns are being assigned.
|
|
//
|
|
// Note that for the "bar" command above, the front end sometimes
|
|
// decomposes this into two assignments, the first one assigning the
|
|
// call to a pair of auto-temps, then the second one assigning the
|
|
// auto-temps to the user-visible vars. This helper will return the
|
|
// second (outer) of these two.
|
|
func (cstb *callSiteTableBuilder) containingAssignment(n ir.Node) ir.Node {
|
|
parent := cstb.nstack[len(cstb.nstack)-1]
|
|
|
|
// assignsOnlyAutoTemps returns TRUE of the specified OAS2FUNC
|
|
// node assigns only auto-temps.
|
|
assignsOnlyAutoTemps := func(x ir.Node) bool {
|
|
alst := x.(*ir.AssignListStmt)
|
|
oa2init := alst.Init()
|
|
if len(oa2init) == 0 {
|
|
return false
|
|
}
|
|
for _, v := range oa2init {
|
|
d := v.(*ir.Decl)
|
|
if !ir.IsAutoTmp(d.X) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Simple case: x := foo()
|
|
if parent.Op() == ir.OAS {
|
|
return parent
|
|
}
|
|
|
|
// Multi-return case: x, y := bar()
|
|
if parent.Op() == ir.OAS2FUNC {
|
|
// Hack city: if the result vars are auto-temps, try looking
|
|
// for an outer assignment in the tree. The code shape we're
|
|
// looking for here is:
|
|
//
|
|
// OAS1({x,y},OCONVNOP(OAS2FUNC({auto1,auto2},OCALLFUNC(bar))))
|
|
//
|
|
if assignsOnlyAutoTemps(parent) {
|
|
par2 := cstb.nstack[len(cstb.nstack)-2]
|
|
if par2.Op() == ir.OAS2 {
|
|
return par2
|
|
}
|
|
if par2.Op() == ir.OCONVNOP {
|
|
par3 := cstb.nstack[len(cstb.nstack)-3]
|
|
if par3.Op() == ir.OAS2 {
|
|
return par3
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// UpdateCallsiteTable handles updating of callerfn's call site table
|
|
// after an inlined has been carried out, e.g. the call at 'n' as been
|
|
// turned into the inlined call expression 'ic' within function
|
|
// callerfn. The chief thing of interest here is to make sure that any
|
|
// call nodes within 'ic' are added to the call site table for
|
|
// 'callerfn' and scored appropriately.
|
|
func UpdateCallsiteTable(callerfn *ir.Func, n *ir.CallExpr, ic *ir.InlinedCallExpr) {
|
|
enableDebugTraceIfEnv()
|
|
defer disableDebugTrace()
|
|
|
|
funcInlHeur, ok := fpmap[callerfn]
|
|
if !ok {
|
|
// This can happen for compiler-generated wrappers.
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= early exit, no entry for caller fn %v\n", callerfn)
|
|
}
|
|
return
|
|
}
|
|
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= UpdateCallsiteTable(caller=%v, cs=%s)\n",
|
|
callerfn, fmtFullPos(n.Pos()))
|
|
}
|
|
|
|
// Mark the call in question as inlined.
|
|
oldcs, ok := funcInlHeur.cstab[n]
|
|
if !ok {
|
|
// This can happen for compiler-generated wrappers.
|
|
return
|
|
}
|
|
oldcs.aux |= csAuxInlined
|
|
|
|
if debugTrace&debugTraceCalls != 0 {
|
|
fmt.Fprintf(os.Stderr, "=-= marked as inlined: callee=%v %s\n",
|
|
oldcs.Callee, EncodeCallSiteKey(oldcs))
|
|
}
|
|
|
|
// Walk the inlined call region to collect new callsites.
|
|
var icp pstate
|
|
if oldcs.Flags&CallSiteOnPanicPath != 0 {
|
|
icp = psCallsPanic
|
|
}
|
|
var loopNestLevel int
|
|
if oldcs.Flags&CallSiteInLoop != 0 {
|
|
loopNestLevel = 1
|
|
}
|
|
ptab := map[ir.Node]pstate{ic: icp}
|
|
nf := newNameFinder(nil)
|
|
icstab := computeCallSiteTable(callerfn, ic.Body, nil, ptab, loopNestLevel, nf)
|
|
|
|
// Record parent callsite. This is primarily for debug output.
|
|
for _, cs := range icstab {
|
|
cs.parent = oldcs
|
|
}
|
|
|
|
// Score the calls in the inlined body. Note the setting of
|
|
// "doCallResults" to false here: at the moment there isn't any
|
|
// easy way to localize or region-ize the work done by
|
|
// "rescoreBasedOnCallResultUses", which currently does a walk
|
|
// over the entire function to look for uses of a given set of
|
|
// results. Similarly we're passing nil to makeCallSiteAnalyzer,
|
|
// so as to run name finding without the use of static value &
|
|
// friends.
|
|
csa := makeCallSiteAnalyzer(nil)
|
|
const doCallResults = false
|
|
csa.scoreCallsRegion(callerfn, ic.Body, icstab, doCallResults, ic)
|
|
}
|