Files
llgo/internal/build/size_report.go
2025-11-17 15:31:34 +08:00

616 lines
16 KiB
Go

package build
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"github.com/goplus/llgo/xtool/env/llvm"
)
type sectionKind int
const (
sectionUnknown sectionKind = iota
sectionText
sectionROData
sectionData
sectionBSS
)
const (
// readelfInitialBuffer is the initial buffer size for reading readelf output.
// Most lines in readelf output are less than 1KB.
readelfInitialBuffer = 64 * 1024
// readelfMaxBuffer is the maximum buffer size to handle very long symbol names
// or section dumps. Reduced from 64MB to prevent excessive memory consumption
// while still accommodating reasonably large binaries.
readelfMaxBuffer = 4 * 1024 * 1024
)
type sectionInfo struct {
Index int
Name string
Segment string
Address uint64
Size uint64
Kind sectionKind
}
type symbolInfo struct {
Name string
SectionIndex int
Address uint64
}
type readelfData struct {
sections map[int]*sectionInfo
symbols map[int][]symbolInfo
}
type moduleSize struct {
Name string
Code uint64
ROData uint64
Data uint64
BSS uint64
}
func (m *moduleSize) Flash() uint64 {
return m.Code + m.ROData + m.Data
}
func (m *moduleSize) RAM() uint64 {
return m.Data + m.BSS
}
type sizeReport struct {
Binary string
Modules map[string]*moduleSize
Total moduleSize
}
func (r *sizeReport) module(name string) *moduleSize {
if name == "" {
name = "(anonymous)"
}
if r.Modules == nil {
r.Modules = make(map[string]*moduleSize)
}
m, ok := r.Modules[name]
if !ok {
m = &moduleSize{Name: name}
r.Modules[name] = m
}
return m
}
func (r *sizeReport) add(name string, kind sectionKind, size uint64) {
if size == 0 {
return
}
m := r.module(name)
switch kind {
case sectionText:
m.Code += size
r.Total.Code += size
case sectionROData:
m.ROData += size
r.Total.ROData += size
case sectionData:
m.Data += size
r.Total.Data += size
case sectionBSS:
m.BSS += size
r.Total.BSS += size
}
}
func reportBinarySize(path, format, level string, pkgs []Package) error {
report, err := collectBinarySize(path, pkgs, level)
if err != nil {
return err
}
switch format {
case "", "text":
printTextReport(os.Stdout, report)
case "json":
return emitJSONReport(os.Stdout, report)
default:
return fmt.Errorf("unknown size format %q (valid: text,json)", format)
}
return nil
}
func collectBinarySize(path string, pkgs []Package, level string) (*sizeReport, error) {
cmd, err := llvm.New("").Readelf("--elf-output-style=LLVM", "--all", path)
if err != nil {
return nil, fmt.Errorf("llvm-readelf: %w", err)
}
var stderr bytes.Buffer
cmd.Stderr = &stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("llvm-readelf stdout: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to execute llvm-readelf: %w", err)
}
parsed, parseErr := parseReadelfOutput(stdout)
closeErr := stdout.Close()
waitErr := cmd.Wait()
if parseErr != nil {
if waitErr != nil {
return nil, fmt.Errorf("llvm-readelf failed: %w\n%s", waitErr, stderr.String())
}
return nil, parseErr
}
if closeErr != nil {
return nil, closeErr
}
if waitErr != nil {
return nil, fmt.Errorf("llvm-readelf failed: %w\n%s", waitErr, stderr.String())
}
report := buildSizeReport(path, parsed, pkgs, level)
if report == nil || len(report.Modules) == 0 {
return nil, fmt.Errorf("size report: no allocatable sections found in %s", path)
}
return report, nil
}
func parseReadelfOutput(r io.Reader) (*readelfData, error) {
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, readelfInitialBuffer), readelfMaxBuffer)
type ctxKind int
const (
ctxRoot ctxKind = iota
ctxSections
ctxSection
ctxSymbols
ctxSymbol
)
type ctx struct {
kind ctxKind
indent int
}
stack := []ctx{{kind: ctxRoot, indent: -1}}
push := func(kind ctxKind, indent int) {
stack = append(stack, ctx{kind: kind, indent: indent})
}
pop := func(expected ctxKind, indent int) bool {
top := stack[len(stack)-1]
if top.kind != expected || top.indent != indent {
return false
}
stack = stack[:len(stack)-1]
return true
}
current := func() ctx {
return stack[len(stack)-1]
}
data := &readelfData{
sections: make(map[int]*sectionInfo),
symbols: make(map[int][]symbolInfo),
}
// readelf outputs section references differently:
// - Mach-O: section numbers are 1-based in symbol references
// - ELF: section numbers in symbol references match the Index directly
secIndexBase := 1 // default to Mach-O behavior; switch to 0 for ELF once detected
var currentSection *sectionInfo
var currentSymbol *symbolInfo
for scanner.Scan() {
raw := scanner.Text()
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
continue
}
// Detect object format early to adjust section index base
if strings.HasPrefix(trimmed, "Format:") {
lower := strings.ToLower(trimmed)
if strings.Contains(lower, "mach-o") {
secIndexBase = 1
} else if strings.Contains(lower, "elf") {
secIndexBase = 0
}
}
indent := countLeadingSpaces(raw)
top := current()
switch {
case strings.HasPrefix(trimmed, "Sections [") && top.kind == ctxRoot:
push(ctxSections, indent)
continue
case strings.HasPrefix(trimmed, "Symbols [") && top.kind == ctxRoot:
push(ctxSymbols, indent)
continue
case trimmed == "Section {" && top.kind == ctxSections && indent == top.indent+2:
currentSection = &sectionInfo{Index: -1}
push(ctxSection, indent)
continue
case trimmed == "Symbol {" && top.kind == ctxSymbols && indent == top.indent+2:
currentSymbol = &symbolInfo{SectionIndex: -1}
push(ctxSymbol, indent)
continue
case trimmed == "}" && pop(ctxSection, indent):
if currentSection != nil && currentSection.Index >= 0 {
currentSection.Kind = classifySection(currentSection.Name, currentSection.Segment)
data.sections[currentSection.Index] = currentSection
}
currentSection = nil
continue
case trimmed == "}" && pop(ctxSymbol, indent):
if currentSymbol != nil && currentSymbol.SectionIndex >= 0 {
data.symbols[currentSymbol.SectionIndex] = append(data.symbols[currentSymbol.SectionIndex], *currentSymbol)
}
currentSymbol = nil
continue
case trimmed == "]" && (top.kind == ctxSections || top.kind == ctxSymbols) && indent == top.indent:
stack = stack[:len(stack)-1]
continue
}
switch top.kind {
case ctxSection:
if currentSection == nil {
continue
}
switch {
case strings.HasPrefix(trimmed, "Index: "):
if idx, err := strconv.Atoi(strings.TrimSpace(trimmed[len("Index: "):])); err == nil {
currentSection.Index = idx
}
case strings.HasPrefix(trimmed, "Name: "):
currentSection.Name = parseNameField(trimmed[len("Name: "):])
case strings.HasPrefix(trimmed, "Segment: "):
currentSection.Segment = parseNameField(trimmed[len("Segment: "):])
case strings.HasPrefix(trimmed, "Address: "):
if val, err := parseUintField(trimmed[len("Address: "):]); err == nil {
currentSection.Address = val
}
case strings.HasPrefix(trimmed, "Size: "):
if val, err := parseUintField(trimmed[len("Size: "):]); err == nil {
currentSection.Size = val
}
}
case ctxSymbol:
if currentSymbol == nil {
continue
}
switch {
case strings.HasPrefix(trimmed, "Name: "):
currentSymbol.Name = parseNameField(trimmed[len("Name: "):])
case strings.HasPrefix(trimmed, "Section: "):
name, idx := parseSectionRef(trimmed[len("Section: "):], secIndexBase)
currentSymbol.SectionIndex = idx
if currentSymbol.Name == "" {
currentSymbol.Name = name
}
case strings.HasPrefix(trimmed, "Value: "):
if val, err := parseUintField(trimmed[len("Value: "):]); err == nil {
currentSymbol.Address = val
}
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return data, nil
}
func countLeadingSpaces(line string) int {
count := 0
for _, ch := range line {
if ch != ' ' {
break
}
count++
}
return count
}
func classifySection(name, segment string) sectionKind {
ln := strings.ToLower(name)
ls := strings.ToLower(segment)
switch {
case strings.Contains(ln, "text"), strings.Contains(ln, "code"), strings.Contains(ln, "plt"):
return sectionText
case strings.Contains(ln, "rodata"), strings.Contains(ln, "const"), strings.Contains(ln, "literal"), strings.Contains(ln, "cstring"):
return sectionROData
case strings.Contains(ln, "bss"), strings.Contains(ln, "tbss"), strings.Contains(ln, "sbss"), strings.Contains(ln, "common"), strings.Contains(ln, "zerofill"):
return sectionBSS
case strings.Contains(ln, "data"), strings.Contains(ln, "got"), strings.Contains(ln, "init_array"), strings.Contains(ln, "cfstring"), strings.Contains(ln, "tdata"):
return sectionData
}
switch {
case strings.Contains(ls, "__text"):
return sectionText
case strings.Contains(ls, "data_const"):
return sectionROData
case strings.Contains(ls, "__data"):
return sectionData
}
return sectionUnknown
}
func buildSizeReport(path string, data *readelfData, pkgs []Package, level string) *sizeReport {
report := &sizeReport{Binary: path, Modules: make(map[string]*moduleSize)}
if data == nil {
return report
}
res := newNameResolver(level, pkgs)
var recognized bool
for idx, sec := range data.sections {
if sec == nil || sec.Size == 0 {
continue
}
if sec.Kind == sectionUnknown {
continue
}
recognized = true
end := sec.Address + sec.Size
syms := data.symbols[idx]
if len(syms) == 0 {
report.add("(unknown "+sec.Name+")", sec.Kind, sec.Size)
continue
}
// Sort symbols by address to calculate sizes based on address ranges
sort.Slice(syms, func(i, j int) bool {
if syms[i].Address == syms[j].Address {
return syms[i].Name < syms[j].Name
}
return syms[i].Address < syms[j].Address
})
cursor := sec.Address
for i := 0; i < len(syms); i++ {
sym := syms[i]
// Skip symbols that are beyond the section bounds
if sym.Address >= end {
continue
}
addr := sym.Address
// Clamp symbol address to section start if it's before the section
if addr < sec.Address {
addr = sec.Address
}
// Add padding bytes between cursor and current symbol
if addr > cursor {
report.add("(padding "+sec.Name+")", sec.Kind, addr-cursor)
cursor = addr
}
// Find the next symbol address to calculate this symbol's size.
// Symbols at the same address are handled by taking the next different address.
next := end
for j := i + 1; j < len(syms); j++ {
if syms[j].Address > addr {
next = syms[j].Address
break
}
}
if next > end {
next = end
}
// Skip symbols with zero size
if next <= addr {
continue
}
// Attribute the address range [addr, next) to the symbol's module
mod := res.resolve(sym.Name)
report.add(mod, sec.Kind, next-addr)
cursor = next
}
// Add any remaining padding at the end of the section
if cursor < end {
report.add("(padding "+sec.Name+")", sec.Kind, end-cursor)
}
}
if !recognized {
return nil
}
return report
}
func emitJSONReport(w io.Writer, report *sizeReport) error {
type moduleJSON struct {
Name string `json:"name"`
Code uint64 `json:"code"`
ROData uint64 `json:"rodata"`
Data uint64 `json:"data"`
BSS uint64 `json:"bss"`
Flash uint64 `json:"flash"`
RAM uint64 `json:"ram"`
}
mods := report.sortedModules()
jsonMods := make([]moduleJSON, 0, len(mods))
for _, m := range mods {
jsonMods = append(jsonMods, moduleJSON{
Name: m.Name,
Code: m.Code,
ROData: m.ROData,
Data: m.Data,
BSS: m.BSS,
Flash: m.Flash(),
RAM: m.RAM(),
})
}
payload := struct {
Binary string `json:"binary"`
Modules []moduleJSON `json:"modules"`
Total moduleJSON `json:"total"`
}{
Binary: filepath.Clean(report.Binary),
Modules: jsonMods,
Total: moduleJSON{
Name: "total",
Code: report.Total.Code,
ROData: report.Total.ROData,
Data: report.Total.Data,
BSS: report.Total.BSS,
Flash: report.Total.Flash(),
RAM: report.Total.RAM(),
},
}
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
return enc.Encode(payload)
}
func printTextReport(w io.Writer, report *sizeReport) {
fmt.Fprintf(w, "\nSize report for %s\n", filepath.Clean(report.Binary))
fmt.Fprintln(w, " code rodata data bss | flash ram | module")
fmt.Fprintln(w, "------------------------------- | --------------- | ----------------")
for _, m := range report.sortedModules() {
fmt.Fprintf(w, "%7d %7d %7d %7d | %7d %7d | %s\n", m.Code, m.ROData, m.Data, m.BSS, m.Flash(), m.RAM(), m.Name)
}
fmt.Fprintln(w, "------------------------------- | --------------- | ----------------")
fmt.Fprintf(w, "%7d %7d %7d %7d | %7d %7d | total\n", report.Total.Code, report.Total.ROData, report.Total.Data, report.Total.BSS, report.Total.Flash(), report.Total.RAM())
}
func (r *sizeReport) sortedModules() []*moduleSize {
mods := make([]*moduleSize, 0, len(r.Modules))
for _, m := range r.Modules {
mods = append(mods, m)
}
sort.Slice(mods, func(i, j int) bool {
if mods[i].Flash() == mods[j].Flash() {
return mods[i].Name < mods[j].Name
}
return mods[i].Flash() > mods[j].Flash()
})
return mods
}
// moduleNameFromSymbol extracts the Go package name from a symbol name.
// It handles various symbol naming conventions:
// - C symbols: Strip leading underscore (e.g., "_main" -> "main")
// - Assembler symbols: Strip leading dot (e.g., ".text" -> "text")
// - Versioned symbols: Remove version suffix (e.g., "symbol@@GLIBC_2.2.5" -> "symbol")
// - Go symbols: Extract package from "package.symbol" format
// - Generic types: Strip type parameters (e.g., "pkg(T)" -> "pkg")
func moduleNameFromSymbol(raw string) string {
name := strings.TrimSpace(raw)
// Strip C symbol prefix
name = strings.TrimPrefix(name, "_")
// Strip assembler symbol prefix
name = strings.TrimPrefix(name, ".")
if name == "" {
return "(anonymous)"
}
// Remove trailing attributes (e.g., "symbol (weak)")
if idx := strings.Index(name, " "); idx > 0 {
name = name[:idx]
}
// Remove version suffix for versioned symbols (e.g., "symbol@@GLIBC_2.2.5")
if idx := strings.Index(name, "@"); idx > 0 {
name = name[:idx]
}
// Extract Go package name from "package.symbol" format
lastDot := strings.LastIndex(name, ".")
if lastDot > 0 {
pkg := name[:lastDot]
// Strip generic type parameters (e.g., "slices.Sort[int]" -> "slices")
if paren := strings.Index(pkg, "("); paren > 0 {
pkg = pkg[:paren]
}
pkg = strings.Trim(pkg, " ")
if pkg != "" {
return pkg
}
}
return name
}
func parseNameField(field string) string {
val := strings.TrimSpace(field)
if idx := strings.Index(val, "("); idx >= 0 {
val = strings.TrimSpace(val[:idx])
}
return val
}
func parseSectionRef(field string, indexBase int) (string, int) {
name := parseNameField(field)
idx := strings.Index(field, "(")
if idx < 0 {
return name, -1
}
end := strings.Index(field[idx:], ")")
if end < 0 {
return name, -1
}
val := strings.TrimSpace(field[idx+1 : idx+end])
val = strings.TrimPrefix(val, "0x")
if val == "" {
return name, -1
}
num, err := strconv.ParseUint(val, 16, 64)
if err != nil {
return name, -1
}
if num == 0 {
return name, -1
}
if indexBase == 0 && num >= 0xFFF0 {
// Special ELF indices such as SHN_ABS/SHN_COMMON.
return name, -1
}
if num > math.MaxInt {
return name, -1
}
res := int(num) - indexBase
if res < 0 {
return name, -1
}
return name, res
}
func parseUintField(field string) (uint64, error) {
val := strings.TrimSpace(field)
if strings.HasPrefix(val, "0x") || strings.HasPrefix(val, "0X") {
return strconv.ParseUint(val[2:], 16, 64)
}
return strconv.ParseUint(val, 10, 64)
}
func ensureSizeReporting(conf *Config) error {
if !conf.SizeReport {
return nil
}
switch strings.ToLower(conf.SizeLevel) {
case "", "module":
conf.SizeLevel = "module"
case "package", "full":
conf.SizeLevel = strings.ToLower(conf.SizeLevel)
default:
return fmt.Errorf("invalid size level %q (valid: full,module,package)", conf.SizeLevel)
}
cmd, err := llvm.New("").Readelf("--version")
if err != nil {
return fmt.Errorf("llvm-readelf not available: %w", err)
}
cmd.Stdout = io.Discard
cmd.Stderr = io.Discard
if err := cmd.Run(); err != nil {
return fmt.Errorf("llvm-readelf not available: %w", err)
}
return nil
}