460 lines
14 KiB
Go
460 lines
14 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
// This package provides Rapid Type Analysis (RTA) for Go, a fast
|
||
// algorithm for call graph construction and discovery of reachable code
|
||
// (and hence dead code) and runtime types. The algorithm was first
|
||
// described in:
|
||
//
|
||
// David F. Bacon and Peter F. Sweeney. 1996.
|
||
// Fast static analysis of C++ virtual function calls. (OOPSLA '96)
|
||
// http://doi.acm.org/10.1145/236337.236371
|
||
//
|
||
// The algorithm uses dynamic programming to tabulate the cross-product
|
||
// of the set of known "address taken" functions with the set of known
|
||
// dynamic calls of the same type. As each new address-taken function
|
||
// is discovered, call graph edges are added from each known callsite,
|
||
// and as each new call site is discovered, call graph edges are added
|
||
// from it to each known address-taken function.
|
||
//
|
||
// A similar approach is used for dynamic calls via interfaces: it
|
||
// tabulates the cross-product of the set of known "runtime types",
|
||
// i.e. types that may appear in an interface value, or be derived from
|
||
// one via reflection, with the set of known "invoke"-mode dynamic
|
||
// calls. As each new "runtime type" is discovered, call edges are
|
||
// added from the known call sites, and as each new call site is
|
||
// discovered, call graph edges are added to each compatible
|
||
// method.
|
||
//
|
||
// In addition, we must consider all exported methods of any runtime type
|
||
// as reachable, since they may be called via reflection.
|
||
//
|
||
// Each time a newly added call edge causes a new function to become
|
||
// reachable, the code of that function is analyzed for more call sites,
|
||
// address-taken functions, and runtime types. The process continues
|
||
// until a fixed point is achieved.
|
||
//
|
||
// The resulting call graph is less precise than one produced by pointer
|
||
// analysis, but the algorithm is much faster. For example, running the
|
||
// cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
|
||
// for points-to analysis.
|
||
//
|
||
package rta // import "github.com/golangci/tools/go/callgraph/rta"
|
||
|
||
// TODO(adonovan): test it by connecting it to the interpreter and
|
||
// replacing all "unreachable" functions by a special intrinsic, and
|
||
// ensure that that intrinsic is never called.
|
||
|
||
import (
|
||
"fmt"
|
||
"go/types"
|
||
|
||
"github.com/golangci/tools/go/callgraph"
|
||
"github.com/golangci/tools/go/ssa"
|
||
"golang.org/x/tools/go/types/typeutil"
|
||
)
|
||
|
||
// A Result holds the results of Rapid Type Analysis, which includes the
|
||
// set of reachable functions/methods, runtime types, and the call graph.
|
||
//
|
||
type Result struct {
|
||
// CallGraph is the discovered callgraph.
|
||
// It does not include edges for calls made via reflection.
|
||
CallGraph *callgraph.Graph
|
||
|
||
// Reachable contains the set of reachable functions and methods.
|
||
// This includes exported methods of runtime types, since
|
||
// they may be accessed via reflection.
|
||
// The value indicates whether the function is address-taken.
|
||
//
|
||
// (We wrap the bool in a struct to avoid inadvertent use of
|
||
// "if Reachable[f] {" to test for set membership.)
|
||
Reachable map[*ssa.Function]struct{ AddrTaken bool }
|
||
|
||
// RuntimeTypes contains the set of types that are needed at
|
||
// runtime, for interfaces or reflection.
|
||
//
|
||
// The value indicates whether the type is inaccessible to reflection.
|
||
// Consider:
|
||
// type A struct{B}
|
||
// fmt.Println(new(A))
|
||
// Types *A, A and B are accessible to reflection, but the unnamed
|
||
// type struct{B} is not.
|
||
RuntimeTypes typeutil.Map
|
||
}
|
||
|
||
// Working state of the RTA algorithm.
|
||
type rta struct {
|
||
result *Result
|
||
|
||
prog *ssa.Program
|
||
|
||
worklist []*ssa.Function // list of functions to visit
|
||
|
||
// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
|
||
// Keys are *types.Signature, values are map[*ssa.Function]bool sets.
|
||
addrTakenFuncsBySig typeutil.Map
|
||
|
||
// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
|
||
// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
|
||
dynCallSites typeutil.Map
|
||
|
||
// invokeSites contains all "invoke"-mode call sites, grouped by interface.
|
||
// Keys are *types.Interface (never *types.Named),
|
||
// Values are unordered []ssa.CallInstruction sets.
|
||
invokeSites typeutil.Map
|
||
|
||
// The following two maps together define the subset of the
|
||
// m:n "implements" relation needed by the algorithm.
|
||
|
||
// concreteTypes maps each concrete type to the set of interfaces that it implements.
|
||
// Keys are types.Type, values are unordered []*types.Interface.
|
||
// Only concrete types used as MakeInterface operands are included.
|
||
concreteTypes typeutil.Map
|
||
|
||
// interfaceTypes maps each interface type to
|
||
// the set of concrete types that implement it.
|
||
// Keys are *types.Interface, values are unordered []types.Type.
|
||
// Only interfaces used in "invoke"-mode CallInstructions are included.
|
||
interfaceTypes typeutil.Map
|
||
}
|
||
|
||
// addReachable marks a function as potentially callable at run-time,
|
||
// and ensures that it gets processed.
|
||
func (r *rta) addReachable(f *ssa.Function, addrTaken bool) {
|
||
reachable := r.result.Reachable
|
||
n := len(reachable)
|
||
v := reachable[f]
|
||
if addrTaken {
|
||
v.AddrTaken = true
|
||
}
|
||
reachable[f] = v
|
||
if len(reachable) > n {
|
||
// First time seeing f. Add it to the worklist.
|
||
r.worklist = append(r.worklist, f)
|
||
}
|
||
}
|
||
|
||
// addEdge adds the specified call graph edge, and marks it reachable.
|
||
// addrTaken indicates whether to mark the callee as "address-taken".
|
||
func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) {
|
||
r.addReachable(callee, addrTaken)
|
||
|
||
if g := r.result.CallGraph; g != nil {
|
||
if site.Parent() == nil {
|
||
panic(site)
|
||
}
|
||
from := g.CreateNode(site.Parent())
|
||
to := g.CreateNode(callee)
|
||
callgraph.AddEdge(from, site, to)
|
||
}
|
||
}
|
||
|
||
// ---------- addrTakenFuncs × dynCallSites ----------
|
||
|
||
// visitAddrTakenFunc is called each time we encounter an address-taken function f.
|
||
func (r *rta) visitAddrTakenFunc(f *ssa.Function) {
|
||
// Create two-level map (Signature -> Function -> bool).
|
||
S := f.Signature
|
||
funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
|
||
if funcs == nil {
|
||
funcs = make(map[*ssa.Function]bool)
|
||
r.addrTakenFuncsBySig.Set(S, funcs)
|
||
}
|
||
if !funcs[f] {
|
||
// First time seeing f.
|
||
funcs[f] = true
|
||
|
||
// If we've seen any dyncalls of this type, mark it reachable,
|
||
// and add call graph edges.
|
||
sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
|
||
for _, site := range sites {
|
||
r.addEdge(site, f, true)
|
||
}
|
||
}
|
||
}
|
||
|
||
// visitDynCall is called each time we encounter a dynamic "call"-mode call.
|
||
func (r *rta) visitDynCall(site ssa.CallInstruction) {
|
||
S := site.Common().Signature()
|
||
|
||
// Record the call site.
|
||
sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
|
||
r.dynCallSites.Set(S, append(sites, site))
|
||
|
||
// For each function of signature S that we know is address-taken,
|
||
// mark it reachable. We'll add the callgraph edges later.
|
||
funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
|
||
for g := range funcs {
|
||
r.addEdge(site, g, true)
|
||
}
|
||
}
|
||
|
||
// ---------- concrete types × invoke sites ----------
|
||
|
||
// addInvokeEdge is called for each new pair (site, C) in the matrix.
|
||
func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
|
||
// Ascertain the concrete method of C to be called.
|
||
imethod := site.Common().Method
|
||
cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
|
||
r.addEdge(site, cmethod, true)
|
||
}
|
||
|
||
// visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
|
||
func (r *rta) visitInvoke(site ssa.CallInstruction) {
|
||
I := site.Common().Value.Type().Underlying().(*types.Interface)
|
||
|
||
// Record the invoke site.
|
||
sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
|
||
r.invokeSites.Set(I, append(sites, site))
|
||
|
||
// Add callgraph edge for each existing
|
||
// address-taken concrete type implementing I.
|
||
for _, C := range r.implementations(I) {
|
||
r.addInvokeEdge(site, C)
|
||
}
|
||
}
|
||
|
||
// ---------- main algorithm ----------
|
||
|
||
// visitFunc processes function f.
|
||
func (r *rta) visitFunc(f *ssa.Function) {
|
||
var space [32]*ssa.Value // preallocate space for common case
|
||
|
||
for _, b := range f.Blocks {
|
||
for _, instr := range b.Instrs {
|
||
rands := instr.Operands(space[:0])
|
||
|
||
switch instr := instr.(type) {
|
||
case ssa.CallInstruction:
|
||
call := instr.Common()
|
||
if call.IsInvoke() {
|
||
r.visitInvoke(instr)
|
||
} else if g := call.StaticCallee(); g != nil {
|
||
r.addEdge(instr, g, false)
|
||
} else if _, ok := call.Value.(*ssa.Builtin); !ok {
|
||
r.visitDynCall(instr)
|
||
}
|
||
|
||
// Ignore the call-position operand when
|
||
// looking for address-taken Functions.
|
||
// Hack: assume this is rands[0].
|
||
rands = rands[1:]
|
||
|
||
case *ssa.MakeInterface:
|
||
r.addRuntimeType(instr.X.Type(), false)
|
||
}
|
||
|
||
// Process all address-taken functions.
|
||
for _, op := range rands {
|
||
if g, ok := (*op).(*ssa.Function); ok {
|
||
r.visitAddrTakenFunc(g)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Analyze performs Rapid Type Analysis, starting at the specified root
|
||
// functions. It returns nil if no roots were specified.
|
||
//
|
||
// If buildCallGraph is true, Result.CallGraph will contain a call
|
||
// graph; otherwise, only the other fields (reachable functions) are
|
||
// populated.
|
||
//
|
||
func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result {
|
||
if len(roots) == 0 {
|
||
return nil
|
||
}
|
||
|
||
r := &rta{
|
||
result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
|
||
prog: roots[0].Prog,
|
||
}
|
||
|
||
if buildCallGraph {
|
||
// TODO(adonovan): change callgraph API to eliminate the
|
||
// notion of a distinguished root node. Some callgraphs
|
||
// have many roots, or none.
|
||
r.result.CallGraph = callgraph.New(roots[0])
|
||
}
|
||
|
||
hasher := typeutil.MakeHasher()
|
||
r.result.RuntimeTypes.SetHasher(hasher)
|
||
r.addrTakenFuncsBySig.SetHasher(hasher)
|
||
r.dynCallSites.SetHasher(hasher)
|
||
r.invokeSites.SetHasher(hasher)
|
||
r.concreteTypes.SetHasher(hasher)
|
||
r.interfaceTypes.SetHasher(hasher)
|
||
|
||
// Visit functions, processing their instructions, and adding
|
||
// new functions to the worklist, until a fixed point is
|
||
// reached.
|
||
var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
|
||
r.worklist = append(r.worklist, roots...)
|
||
for len(r.worklist) > 0 {
|
||
shadow, r.worklist = r.worklist, shadow[:0]
|
||
for _, f := range shadow {
|
||
r.visitFunc(f)
|
||
}
|
||
}
|
||
return r.result
|
||
}
|
||
|
||
// interfaces(C) returns all currently known interfaces implemented by C.
|
||
func (r *rta) interfaces(C types.Type) []*types.Interface {
|
||
// Ascertain set of interfaces C implements
|
||
// and update 'implements' relation.
|
||
var ifaces []*types.Interface
|
||
r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
|
||
if I := I.(*types.Interface); types.Implements(C, I) {
|
||
concs, _ := concs.([]types.Type)
|
||
r.interfaceTypes.Set(I, append(concs, C))
|
||
ifaces = append(ifaces, I)
|
||
}
|
||
})
|
||
r.concreteTypes.Set(C, ifaces)
|
||
return ifaces
|
||
}
|
||
|
||
// implementations(I) returns all currently known concrete types that implement I.
|
||
func (r *rta) implementations(I *types.Interface) []types.Type {
|
||
var concs []types.Type
|
||
if v := r.interfaceTypes.At(I); v != nil {
|
||
concs = v.([]types.Type)
|
||
} else {
|
||
// First time seeing this interface.
|
||
// Update the 'implements' relation.
|
||
r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
|
||
if types.Implements(C, I) {
|
||
ifaces, _ := ifaces.([]*types.Interface)
|
||
r.concreteTypes.Set(C, append(ifaces, I))
|
||
concs = append(concs, C)
|
||
}
|
||
})
|
||
r.interfaceTypes.Set(I, concs)
|
||
}
|
||
return concs
|
||
}
|
||
|
||
// addRuntimeType is called for each concrete type that can be the
|
||
// dynamic type of some interface or reflect.Value.
|
||
// Adapted from needMethods in go/ssa/builder.go
|
||
//
|
||
func (r *rta) addRuntimeType(T types.Type, skip bool) {
|
||
if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
|
||
if skip && !prev {
|
||
r.result.RuntimeTypes.Set(T, skip)
|
||
}
|
||
return
|
||
}
|
||
r.result.RuntimeTypes.Set(T, skip)
|
||
|
||
mset := r.prog.MethodSets.MethodSet(T)
|
||
|
||
if _, ok := T.Underlying().(*types.Interface); !ok {
|
||
// T is a new concrete type.
|
||
for i, n := 0, mset.Len(); i < n; i++ {
|
||
sel := mset.At(i)
|
||
m := sel.Obj()
|
||
|
||
if m.Exported() {
|
||
// Exported methods are always potentially callable via reflection.
|
||
r.addReachable(r.prog.MethodValue(sel), true)
|
||
}
|
||
}
|
||
|
||
// Add callgraph edge for each existing dynamic
|
||
// "invoke"-mode call via that interface.
|
||
for _, I := range r.interfaces(T) {
|
||
sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
|
||
for _, site := range sites {
|
||
r.addInvokeEdge(site, T)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Precondition: T is not a method signature (*Signature with Recv()!=nil).
|
||
// Recursive case: skip => don't call makeMethods(T).
|
||
// Each package maintains its own set of types it has visited.
|
||
|
||
var n *types.Named
|
||
switch T := T.(type) {
|
||
case *types.Named:
|
||
n = T
|
||
case *types.Pointer:
|
||
n, _ = T.Elem().(*types.Named)
|
||
}
|
||
if n != nil {
|
||
owner := n.Obj().Pkg()
|
||
if owner == nil {
|
||
return // built-in error type
|
||
}
|
||
}
|
||
|
||
// Recursion over signatures of each exported method.
|
||
for i := 0; i < mset.Len(); i++ {
|
||
if mset.At(i).Obj().Exported() {
|
||
sig := mset.At(i).Type().(*types.Signature)
|
||
r.addRuntimeType(sig.Params(), true) // skip the Tuple itself
|
||
r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
|
||
}
|
||
}
|
||
|
||
switch t := T.(type) {
|
||
case *types.Basic:
|
||
// nop
|
||
|
||
case *types.Interface:
|
||
// nop---handled by recursion over method set.
|
||
|
||
case *types.Pointer:
|
||
r.addRuntimeType(t.Elem(), false)
|
||
|
||
case *types.Slice:
|
||
r.addRuntimeType(t.Elem(), false)
|
||
|
||
case *types.Chan:
|
||
r.addRuntimeType(t.Elem(), false)
|
||
|
||
case *types.Map:
|
||
r.addRuntimeType(t.Key(), false)
|
||
r.addRuntimeType(t.Elem(), false)
|
||
|
||
case *types.Signature:
|
||
if t.Recv() != nil {
|
||
panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
|
||
}
|
||
r.addRuntimeType(t.Params(), true) // skip the Tuple itself
|
||
r.addRuntimeType(t.Results(), true) // skip the Tuple itself
|
||
|
||
case *types.Named:
|
||
// A pointer-to-named type can be derived from a named
|
||
// type via reflection. It may have methods too.
|
||
r.addRuntimeType(types.NewPointer(T), false)
|
||
|
||
// Consider 'type T struct{S}' where S has methods.
|
||
// Reflection provides no way to get from T to struct{S},
|
||
// only to S, so the method set of struct{S} is unwanted,
|
||
// so set 'skip' flag during recursion.
|
||
r.addRuntimeType(t.Underlying(), true)
|
||
|
||
case *types.Array:
|
||
r.addRuntimeType(t.Elem(), false)
|
||
|
||
case *types.Struct:
|
||
for i, n := 0, t.NumFields(); i < n; i++ {
|
||
r.addRuntimeType(t.Field(i).Type(), false)
|
||
}
|
||
|
||
case *types.Tuple:
|
||
for i, n := 0, t.Len(); i < n; i++ {
|
||
r.addRuntimeType(t.At(i).Type(), false)
|
||
}
|
||
|
||
default:
|
||
panic(T)
|
||
}
|
||
}
|