Initial commit: Go 1.23 release state

2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions
--- a/src/cmd/compile/internal/amd64/galign.go
+++ b/src/cmd/compile/internal/amd64/galign.go
@@ -0,0 +1,27 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package amd64
+
+import (
+	"cmd/compile/internal/ssagen"
+	"cmd/internal/obj/x86"
+)
+
+var leaptr = x86.ALEAQ
+
+func Init(arch *ssagen.ArchInfo) {
+	arch.LinkArch = &x86.Linkamd64
+	arch.REGSP = x86.REGSP
+	arch.MAXWIDTH = 1 << 50
+
+	arch.ZeroRange = zerorange
+	arch.Ginsnop = ginsnop
+
+	arch.SSAMarkMoves = ssaMarkMoves
+	arch.SSAGenValue = ssaGenValue
+	arch.SSAGenBlock = ssaGenBlock
+	arch.LoadRegResult = loadRegResult
+	arch.SpillArgReg = spillArgReg
+}
--- a/src/cmd/compile/internal/amd64/ggen.go
+++ b/src/cmd/compile/internal/amd64/ggen.go
@@ -0,0 +1,135 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package amd64
+
+import (
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/objw"
+	"cmd/compile/internal/types"
+	"cmd/internal/obj"
+	"cmd/internal/obj/x86"
+	"internal/buildcfg"
+)
+
+// no floating point in note handlers on Plan 9
+var isPlan9 = buildcfg.GOOS == "plan9"
+
+// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
+// See runtime/mkduff.go.
+const (
+	dzBlocks    = 16 // number of MOV/ADD blocks
+	dzBlockLen  = 4  // number of clears per block
+	dzBlockSize = 23 // size of instructions in a single block
+	dzMovSize   = 5  // size of single MOV instruction w/ offset
+	dzLeaqSize  = 4  // size of single LEAQ instruction
+	dzClearStep = 16 // number of bytes cleared by each MOV instruction
+
+	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
+	dzSize     = dzBlocks * dzBlockSize
+)
+
+// dzOff returns the offset for a jump into DUFFZERO.
+// b is the number of bytes to zero.
+func dzOff(b int64) int64 {
+	off := int64(dzSize)
+	off -= b / dzClearLen * dzBlockSize
+	tailLen := b % dzClearLen
+	if tailLen >= dzClearStep {
+		off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
+	}
+	return off
+}
+
+// duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
+// b is the number of bytes to zero.
+func dzDI(b int64) int64 {
+	tailLen := b % dzClearLen
+	if tailLen < dzClearStep {
+		return 0
+	}
+	tailSteps := tailLen / dzClearStep
+	return -dzClearStep * (dzBlockLen - tailSteps)
+}
+
+func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
+	const (
+		r13 = 1 << iota // if R13 is already zeroed.
+	)
+
+	if cnt == 0 {
+		return p
+	}
+
+	if cnt == 8 {
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off)
+	} else if !isPlan9 && cnt <= int64(8*types.RegSize) {
+		for i := int64(0); i < cnt/16; i++ {
+			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
+		}
+
+		if cnt%16 != 0 {
+			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
+		}
+	} else if !isPlan9 && (cnt <= int64(128*types.RegSize)) {
+		// Save DI to r12. With the amd64 Go register abi, DI can contain
+		// an incoming parameter, whereas R12 is always scratch.
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
+		// Emit duffzero call
+		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
+		p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
+		p.To.Sym = ir.Syms.Duffzero
+		if cnt%16 != 0 {
+			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
+		}
+		// Restore DI from r12
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
+
+	} else {
+		// When the register ABI is in effect, at this point in the
+		// prolog we may have live values in all of RAX,RDI,RCX. Save
+		// them off to registers before the REPSTOSQ below, then
+		// restore. Note that R12 and R13 are always available as
+		// scratch regs; here we also use R15 (this is safe to do
+		// since there won't be any globals accessed in the prolog).
+		// See rewriteToUseGot() in obj6.go for more on r15 use.
+
+		// Save rax/rdi/rcx
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
+
+		// Set up the REPSTOSQ and kick it off.
+		p = pp.Append(p, x86.AXORL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_AX, 0)
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
+		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
+		p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
+		p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
+
+		// Restore rax/rdi/rcx
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
+		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
+
+		// Record the fact that r13 is no longer zero.
+		*state &= ^uint32(r13)
+	}
+
+	return p
+}
+
+func ginsnop(pp *objw.Progs) *obj.Prog {
+	// This is a hardware nop (1-byte 0x90) instruction,
+	// even though we describe it as an explicit XCHGL here.
+	// Particularly, this does not zero the high 32 bits
+	// like typical *L opcodes.
+	// (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which
+	// does zero the high 32 bits.)
+	p := pp.Prog(x86.AXCHGL)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = x86.REG_AX
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = x86.REG_AX
+	return p
+}
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
--- a/src/cmd/compile/internal/amd64/versions_test.go
+++ b/src/cmd/compile/internal/amd64/versions_test.go
@@ -0,0 +1,433 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// When using GOEXPERIMENT=boringcrypto, the test program links in the boringcrypto syso,
+// which does not respect GOAMD64, so we skip the test if boringcrypto is enabled.
+//go:build !boringcrypto
+
+package amd64_test
+
+import (
+	"bufio"
+	"debug/elf"
+	"debug/macho"
+	"errors"
+	"fmt"
+	"go/build"
+	"internal/testenv"
+	"io"
+	"math"
+	"math/bits"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// Test to make sure that when building for GOAMD64=v1, we don't
+// use any >v1 instructions.
+func TestGoAMD64v1(t *testing.T) {
+	if runtime.GOARCH != "amd64" {
+		t.Skip("amd64-only test")
+	}
+	if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
+		t.Skip("test only works on elf or macho platforms")
+	}
+	for _, tag := range build.Default.ToolTags {
+		if tag == "amd64.v2" {
+			t.Skip("compiling for GOAMD64=v2 or higher")
+		}
+	}
+	if os.Getenv("TESTGOAMD64V1") != "" {
+		t.Skip("recursive call")
+	}
+
+	// Make a binary which will be a modified version of the
+	// currently running binary.
+	dst, err := os.CreateTemp("", "TestGoAMD64v1")
+	if err != nil {
+		t.Fatalf("failed to create temp file: %v", err)
+	}
+	defer os.Remove(dst.Name())
+	dst.Chmod(0500) // make executable
+
+	// Clobber all the non-v1 opcodes.
+	opcodes := map[string]bool{}
+	var features []string
+	for feature, opcodeList := range featureToOpcodes {
+		if runtimeFeatures[feature] {
+			features = append(features, fmt.Sprintf("cpu.%s=off", feature))
+		}
+		for _, op := range opcodeList {
+			opcodes[op] = true
+		}
+	}
+	clobber(t, os.Args[0], dst, opcodes)
+	if err = dst.Close(); err != nil {
+		t.Fatalf("can't close binary: %v", err)
+	}
+
+	// Run the resulting binary.
+	cmd := testenv.Command(t, dst.Name())
+	testenv.CleanCmdEnv(cmd)
+	cmd.Env = append(cmd.Env, "TESTGOAMD64V1=yes")
+	cmd.Env = append(cmd.Env, fmt.Sprintf("GODEBUG=%s", strings.Join(features, ",")))
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("couldn't execute test: %s", err)
+	}
+	// Expect to see output of the form "PASS\n", unless the test binary
+	// was compiled for coverage (in which case there will be an extra line).
+	success := false
+	lines := strings.Split(string(out), "\n")
+	if len(lines) == 2 {
+		success = lines[0] == "PASS" && lines[1] == ""
+	} else if len(lines) == 3 {
+		success = lines[0] == "PASS" &&
+			strings.HasPrefix(lines[1], "coverage") && lines[2] == ""
+	}
+	if !success {
+		t.Fatalf("test reported error: %s lines=%+v", string(out), lines)
+	}
+}
+
+// Clobber copies the binary src to dst, replacing all the instructions in opcodes with
+// faulting instructions.
+func clobber(t *testing.T, src string, dst *os.File, opcodes map[string]bool) {
+	// Run objdump to get disassembly.
+	var re *regexp.Regexp
+	var disasm io.Reader
+	if false {
+		// TODO: go tool objdump doesn't disassemble the bmi1 instructions
+		// in question correctly. See issue 48584.
+		cmd := testenv.Command(t, "go", "tool", "objdump", src)
+		var err error
+		disasm, err = cmd.StdoutPipe()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := cmd.Start(); err != nil {
+			t.Fatal(err)
+		}
+		t.Cleanup(func() {
+			if err := cmd.Wait(); err != nil {
+				t.Error(err)
+			}
+		})
+		re = regexp.MustCompile(`^[^:]*:[-\d]+\s+0x([\da-f]+)\s+([\da-f]+)\s+([A-Z]+)`)
+	} else {
+		// TODO: we're depending on platform-native objdump here. Hence the Skipf
+		// below if it doesn't run for some reason.
+		cmd := testenv.Command(t, "objdump", "-d", src)
+		var err error
+		disasm, err = cmd.StdoutPipe()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := cmd.Start(); err != nil {
+			if errors.Is(err, exec.ErrNotFound) {
+				t.Skipf("can't run test due to missing objdump: %s", err)
+			}
+			t.Fatal(err)
+		}
+		t.Cleanup(func() {
+			if err := cmd.Wait(); err != nil {
+				t.Error(err)
+			}
+		})
+		re = regexp.MustCompile(`^\s*([\da-f]+):\s*((?:[\da-f][\da-f] )+)\s*([a-z\d]+)`)
+	}
+
+	// Find all the instruction addresses we need to edit.
+	virtualEdits := map[uint64]bool{}
+	scanner := bufio.NewScanner(disasm)
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := re.FindStringSubmatch(line)
+		if len(parts) == 0 {
+			continue
+		}
+		addr, err := strconv.ParseUint(parts[1], 16, 64)
+		if err != nil {
+			continue // not a hex address
+		}
+		opcode := strings.ToLower(parts[3])
+		if !opcodes[opcode] {
+			continue
+		}
+		t.Logf("clobbering instruction %s", line)
+		n := (len(parts[2]) - strings.Count(parts[2], " ")) / 2 // number of bytes in instruction encoding
+		for i := 0; i < n; i++ {
+			// Only really need to make the first byte faulting, but might
+			// as well make all the bytes faulting.
+			virtualEdits[addr+uint64(i)] = true
+		}
+	}
+
+	// Figure out where in the binary the edits must be done.
+	physicalEdits := map[uint64]bool{}
+	if e, err := elf.Open(src); err == nil {
+		for _, sec := range e.Sections {
+			vaddr := sec.Addr
+			paddr := sec.Offset
+			size := sec.Size
+			for a := range virtualEdits {
+				if a >= vaddr && a < vaddr+size {
+					physicalEdits[paddr+(a-vaddr)] = true
+				}
+			}
+		}
+	} else if m, err2 := macho.Open(src); err2 == nil {
+		for _, sec := range m.Sections {
+			vaddr := sec.Addr
+			paddr := uint64(sec.Offset)
+			size := sec.Size
+			for a := range virtualEdits {
+				if a >= vaddr && a < vaddr+size {
+					physicalEdits[paddr+(a-vaddr)] = true
+				}
+			}
+		}
+	} else {
+		t.Log(err)
+		t.Log(err2)
+		t.Fatal("executable format not elf or macho")
+	}
+	if len(virtualEdits) != len(physicalEdits) {
+		t.Fatal("couldn't find an instruction in text sections")
+	}
+
+	// Copy source to destination, making edits along the way.
+	f, err := os.Open(src)
+	if err != nil {
+		t.Fatal(err)
+	}
+	r := bufio.NewReader(f)
+	w := bufio.NewWriter(dst)
+	a := uint64(0)
+	done := 0
+	for {
+		b, err := r.ReadByte()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal("can't read")
+		}
+		if physicalEdits[a] {
+			b = 0xcc // INT3 opcode
+			done++
+		}
+		err = w.WriteByte(b)
+		if err != nil {
+			t.Fatal("can't write")
+		}
+		a++
+	}
+	if done != len(physicalEdits) {
+		t.Fatal("physical edits remaining")
+	}
+	w.Flush()
+	f.Close()
+}
+
+func setOf(keys ...string) map[string]bool {
+	m := make(map[string]bool, len(keys))
+	for _, key := range keys {
+		m[key] = true
+	}
+	return m
+}
+
+var runtimeFeatures = setOf(
+	"adx", "aes", "avx", "avx2", "bmi1", "bmi2", "erms", "fma",
+	"pclmulqdq", "popcnt", "rdtscp", "sse3", "sse41", "sse42", "ssse3",
+)
+
+var featureToOpcodes = map[string][]string{
+	// Note: we include *q, *l, and plain opcodes here.
+	// go tool objdump doesn't include a [QL] on popcnt instructions, until CL 351889
+	// native objdump doesn't include [QL] on linux.
+	"popcnt": {"popcntq", "popcntl", "popcnt"},
+	"bmi1": {
+		"andnq", "andnl", "andn",
+		"blsiq", "blsil", "blsi",
+		"blsmskq", "blsmskl", "blsmsk",
+		"blsrq", "blsrl", "blsr",
+		"tzcntq", "tzcntl", "tzcnt",
+	},
+	"bmi2": {
+		"sarxq", "sarxl", "sarx",
+		"shlxq", "shlxl", "shlx",
+		"shrxq", "shrxl", "shrx",
+	},
+	"sse41": {
+		"roundsd",
+		"pinsrq", "pinsrl", "pinsrd", "pinsrb", "pinsr",
+		"pextrq", "pextrl", "pextrd", "pextrb", "pextr",
+		"pminsb", "pminsd", "pminuw", "pminud", // Note: ub and sw are ok.
+		"pmaxsb", "pmaxsd", "pmaxuw", "pmaxud",
+		"pmovzxbw", "pmovzxbd", "pmovzxbq", "pmovzxwd", "pmovzxwq", "pmovzxdq",
+		"pmovsxbw", "pmovsxbd", "pmovsxbq", "pmovsxwd", "pmovsxwq", "pmovsxdq",
+		"pblendvb",
+	},
+	"fma":   {"vfmadd231sd"},
+	"movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"},
+	"lzcnt": {"lzcntq", "lzcntl", "lzcnt"},
+}
+
+// Test to use POPCNT instruction, if available
+func TestPopCnt(t *testing.T) {
+	for _, tt := range []struct {
+		x    uint64
+		want int
+	}{
+		{0b00001111, 4},
+		{0b00001110, 3},
+		{0b00001100, 2},
+		{0b00000000, 0},
+	} {
+		if got := bits.OnesCount64(tt.x); got != tt.want {
+			t.Errorf("OnesCount64(%#x) = %d, want %d", tt.x, got, tt.want)
+		}
+		if got := bits.OnesCount32(uint32(tt.x)); got != tt.want {
+			t.Errorf("OnesCount32(%#x) = %d, want %d", tt.x, got, tt.want)
+		}
+	}
+}
+
+// Test to use ANDN, if available
+func TestAndNot(t *testing.T) {
+	for _, tt := range []struct {
+		x, y, want uint64
+	}{
+		{0b00001111, 0b00000011, 0b1100},
+		{0b00001111, 0b00001100, 0b0011},
+		{0b00000000, 0b00000000, 0b0000},
+	} {
+		if got := tt.x &^ tt.y; got != tt.want {
+			t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want)
+		}
+		if got := uint32(tt.x) &^ uint32(tt.y); got != uint32(tt.want) {
+			t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want)
+		}
+	}
+}
+
+// Test to use BLSI, if available
+func TestBLSI(t *testing.T) {
+	for _, tt := range []struct {
+		x, want uint64
+	}{
+		{0b00001111, 0b001},
+		{0b00001110, 0b010},
+		{0b00001100, 0b100},
+		{0b11000110, 0b010},
+		{0b00000000, 0b000},
+	} {
+		if got := tt.x & -tt.x; got != tt.want {
+			t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want)
+		}
+		if got := uint32(tt.x) & -uint32(tt.x); got != uint32(tt.want) {
+			t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want)
+		}
+	}
+}
+
+// Test to use BLSMSK, if available
+func TestBLSMSK(t *testing.T) {
+	for _, tt := range []struct {
+		x, want uint64
+	}{
+		{0b00001111, 0b001},
+		{0b00001110, 0b011},
+		{0b00001100, 0b111},
+		{0b11000110, 0b011},
+		{0b00000000, 1<<64 - 1},
+	} {
+		if got := tt.x ^ (tt.x - 1); got != tt.want {
+			t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
+		}
+		if got := uint32(tt.x) ^ (uint32(tt.x) - 1); got != uint32(tt.want) {
+			t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, uint32(tt.want))
+		}
+	}
+}
+
+// Test to use BLSR, if available
+func TestBLSR(t *testing.T) {
+	for _, tt := range []struct {
+		x, want uint64
+	}{
+		{0b00001111, 0b00001110},
+		{0b00001110, 0b00001100},
+		{0b00001100, 0b00001000},
+		{0b11000110, 0b11000100},
+		{0b00000000, 0b00000000},
+	} {
+		if got := tt.x & (tt.x - 1); got != tt.want {
+			t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
+		}
+		if got := uint32(tt.x) & (uint32(tt.x) - 1); got != uint32(tt.want) {
+			t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want)
+		}
+	}
+}
+
+func TestTrailingZeros(t *testing.T) {
+	for _, tt := range []struct {
+		x    uint64
+		want int
+	}{
+		{0b00001111, 0},
+		{0b00001110, 1},
+		{0b00001100, 2},
+		{0b00001000, 3},
+		{0b00000000, 64},
+	} {
+		if got := bits.TrailingZeros64(tt.x); got != tt.want {
+			t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, tt.want)
+		}
+		want := tt.want
+		if want == 64 {
+			want = 32
+		}
+		if got := bits.TrailingZeros32(uint32(tt.x)); got != want {
+			t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, want)
+		}
+	}
+}
+
+func TestRound(t *testing.T) {
+	for _, tt := range []struct {
+		x, want float64
+	}{
+		{1.4, 1},
+		{1.5, 2},
+		{1.6, 2},
+		{2.4, 2},
+		{2.5, 2},
+		{2.6, 3},
+	} {
+		if got := math.RoundToEven(tt.x); got != tt.want {
+			t.Errorf("RoundToEven(%f) = %f, want %f", tt.x, got, tt.want)
+		}
+	}
+}
+
+func TestFMA(t *testing.T) {
+	for _, tt := range []struct {
+		x, y, z, want float64
+	}{
+		{2, 3, 4, 10},
+		{3, 4, 5, 17},
+	} {
+		if got := math.FMA(tt.x, tt.y, tt.z); got != tt.want {
+			t.Errorf("FMA(%f,%f,%f) = %f, want %f", tt.x, tt.y, tt.z, got, tt.want)
+		}
+	}
+}