Update to go1.24.0

2025-02-14 12:42:07 +07:00
parent 25e497e367
commit bf266cebe6
3169 changed files with 236789 additions and 60275 deletions
--- a/src/runtime/HACKING.md
+++ b/src/runtime/HACKING.md
@@ -235,7 +235,7 @@ There are three mechanisms for allocating unmanaged memory:
  objects of the same type.

 In general, types that are allocated using any of these should be
-marked as not in heap by embedding `runtime/internal/sys.NotInHeap`.
+marked as not in heap by embedding `internal/runtime/sys.NotInHeap`.

 Objects that are allocated in unmanaged memory **must not** contain
 heap pointers unless the following rules are also obeyed:
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -8,6 +8,7 @@ import (
 	"internal/abi"
 	"internal/cpu"
 	"internal/goarch"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -34,7 +35,7 @@ func memhash128(p unsafe.Pointer, h uintptr) uintptr {

 //go:nosplit
 func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
-	ptr := getclosureptr()
+	ptr := sys.GetClosurePtr()
 	size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h)))
 	return memhash(p, h, size)
 }
@@ -56,8 +57,6 @@ var useAeshash bool
 //   - github.com/outcaste-io/ristretto
 //   - github.com/puzpuzpuz/xsync/v2
 //   - github.com/puzpuzpuz/xsync/v3
-//   - github.com/segmentio/parquet-go
-//   - github.com/parquet-go/parquet-go
 //   - github.com/authzed/spicedb
 //   - github.com/pingcap/badger
 //
@@ -67,28 +66,8 @@ var useAeshash bool
 //go:linkname memhash
 func memhash(p unsafe.Pointer, h, s uintptr) uintptr

-// memhash32 should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/segmentio/parquet-go
-//   - github.com/parquet-go/parquet-go
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname memhash32
 func memhash32(p unsafe.Pointer, h uintptr) uintptr

-// memhash64 should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/segmentio/parquet-go
-//   - github.com/parquet-go/parquet-go
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname memhash64
 func memhash64(p unsafe.Pointer, h uintptr) uintptr

 // strhash should be an internal detail,
@@ -97,7 +76,6 @@ func memhash64(p unsafe.Pointer, h uintptr) uintptr
 //   - github.com/aristanetworks/goarista
 //   - github.com/bytedance/sonic
 //   - github.com/bytedance/go-tagexpr/v2
-//   - github.com/cloudwego/frugal
 //   - github.com/cloudwego/dynamicgo
 //   - github.com/v2fly/v2ray-core/v5
 //
--- a/src/runtime/arena.go
+++ b/src/runtime/arena.go
@@ -86,8 +86,8 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/runtime/atomic"
-	"runtime/internal/math"
-	"runtime/internal/sys"
+	"internal/runtime/math"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -554,13 +554,7 @@ func userArenaHeapBitsSetType(typ *_type, ptr unsafe.Pointer, s *mspan) {
 	base := s.base()
 	h := s.writeUserArenaHeapBits(uintptr(ptr))

-	p := typ.GCData // start of 1-bit pointer mask (or GC program)
-	var gcProgBits uintptr
-	if typ.Kind_&abi.KindGCProg != 0 {
-		// Expand gc program, using the object itself for storage.
-		gcProgBits = runGCProg(addb(p, 4), (*byte)(ptr))
-		p = (*byte)(ptr)
-	}
+	p := getGCMask(typ) // start of 1-bit pointer mask
 	nb := typ.PtrBytes / goarch.PtrSize

 	for i := uintptr(0); i < nb; i += ptrBits {
@@ -585,11 +579,6 @@ func userArenaHeapBitsSetType(typ *_type, ptr unsafe.Pointer, s *mspan) {
 	h = h.pad(s, typ.Size_-typ.PtrBytes)
 	h.flush(s, uintptr(ptr), typ.Size_)

-	if typ.Kind_&abi.KindGCProg != 0 {
-		// Zero out temporary ptrmask buffer inside object.
-		memclrNoHeapPointers(ptr, (gcProgBits+7)/8)
-	}
-
 	// Update the PtrBytes value in the type information. After this
 	// point, the GC will observe the new bitmap.
 	s.largeType.PtrBytes = uintptr(ptr) - base + typ.PtrBytes
@@ -798,11 +787,8 @@ func newUserArenaChunk() (unsafe.Pointer, *mspan) {

 	if asanenabled {
 		// TODO(mknyszek): Track individual objects.
-		rzSize := computeRZlog(span.elemsize)
-		span.elemsize -= rzSize
-		span.largeType.Size_ = span.elemsize
+		// N.B. span.elemsize includes a redzone already.
 		rzStart := span.base() + span.elemsize
-		span.userArenaChunkFree = makeAddrRange(span.base(), rzStart)
 		asanpoison(unsafe.Pointer(rzStart), span.limit-rzStart)
 		asanunpoison(unsafe.Pointer(span.base()), span.elemsize)
 	}
@@ -813,8 +799,8 @@ func newUserArenaChunk() (unsafe.Pointer, *mspan) {
 			throw("newUserArenaChunk called without a P or outside bootstrapping")
 		}
 		// Note cache c only valid while m acquired; see #47302
-		if rate != 1 && userArenaChunkBytes < c.nextSample {
-			c.nextSample -= userArenaChunkBytes
+		if rate != 1 && int64(userArenaChunkBytes) < c.nextSample {
+			c.nextSample -= int64(userArenaChunkBytes)
 		} else {
 			profilealloc(mp, unsafe.Pointer(span.base()), userArenaChunkBytes)
 		}
@@ -1067,6 +1053,11 @@ func (h *mheap) allocUserArenaChunk() *mspan {
 	s.freeindex = 1
 	s.allocCount = 1

+	// Adjust size to include redzone.
+	if asanenabled {
+		s.elemsize -= redZoneSize(s.elemsize)
+	}
+
 	// Account for this new arena chunk memory.
 	gcController.heapInUse.add(int64(userArenaChunkBytes))
 	gcController.heapReleased.add(-int64(userArenaChunkBytes))
@@ -1088,7 +1079,7 @@ func (h *mheap) allocUserArenaChunk() *mspan {

 	// This must clear the entire heap bitmap so that it's safe
 	// to allocate noscan data without writing anything out.
-	s.initHeapBits(true)
+	s.initHeapBits()

 	// Clear the span preemptively. It's an arena chunk, so let's assume
 	// everything is going to be used.
--- a/src/runtime/asan.go
+++ b/src/runtime/asan.go
@@ -7,19 +7,20 @@
 package runtime

 import (
+	"internal/runtime/sys"
 	"unsafe"
 )

 // Public address sanitizer API.
 func ASanRead(addr unsafe.Pointer, len int) {
-	sp := getcallersp()
-	pc := getcallerpc()
+	sp := sys.GetCallerSP()
+	pc := sys.GetCallerPC()
 	doasanread(addr, uintptr(len), sp, pc)
 }

 func ASanWrite(addr unsafe.Pointer, len int) {
-	sp := getcallersp()
-	pc := getcallerpc()
+	sp := sys.GetCallerSP()
+	pc := sys.GetCallerPC()
 	doasanwrite(addr, uintptr(len), sp, pc)
 }

@@ -32,16 +33,16 @@ const asanenabled = true
 //go:linkname asanread
 //go:nosplit
 func asanread(addr unsafe.Pointer, sz uintptr) {
-	sp := getcallersp()
-	pc := getcallerpc()
+	sp := sys.GetCallerSP()
+	pc := sys.GetCallerPC()
 	doasanread(addr, sz, sp, pc)
 }

 //go:linkname asanwrite
 //go:nosplit
 func asanwrite(addr unsafe.Pointer, sz uintptr) {
-	sp := getcallersp()
-	pc := getcallerpc()
+	sp := sys.GetCallerSP()
+	pc := sys.GetCallerPC()
 	doasanwrite(addr, sz, sp, pc)
 }

--- a/src/runtime/asm_loong64.s
+++ b/src/runtime/asm_loong64.s
@@ -69,6 +69,10 @@ nocgo:
 	// start this M
 	JAL	runtime·mstart(SB)

+	// Prevent dead-code elimination of debugCallV2, which is
+	// intended to be called by debuggers.
+	MOVV	$runtime·debugCallV2<ABIInternal>(SB), R0
+
 	MOVV	R0, 1(R0)
 	RET

@@ -87,9 +91,8 @@ TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
 	RET // not reached

 // func cputicks() int64
-TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+TEXT runtime·cputicks<ABIInternal>(SB),NOSPLIT,$0-8
 	RDTIMED	R0, R4
-	MOVV	R4, ret+0(FP)
 	RET

 /*
@@ -209,19 +212,19 @@ noswitch:
 	JMP	(R4)

 // func switchToCrashStack0(fn func())
-TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-8
-	MOVV	fn+0(FP), REGCTXT	// context register
-	MOVV	g_m(g), R4	// curm
+TEXT runtime·switchToCrashStack0<ABIInternal>(SB),NOSPLIT,$0-8
+	MOVV	R4, REGCTXT	// context register
+	MOVV	g_m(g), R5	// curm

 	// set g to gcrash
 	MOVV	$runtime·gcrash(SB), g	// g = &gcrash
 	JAL	runtime·save_g(SB)
-	MOVV	R4, g_m(g)	// g.m = curm
-	MOVV	g, m_g0(R4)	// curm.g0 = g
+	MOVV	R5, g_m(g)	// g.m = curm
+	MOVV	g, m_g0(R5)	// curm.g0 = g

 	// switch to crashstack
-	MOVV	(g_stack+stack_hi)(g), R4
-	ADDV	$(-4*8), R4, R3
+	MOVV	(g_stack+stack_hi)(g), R5
+	ADDV	$(-4*8), R5, R3

 	// call target function
 	MOVV	0(REGCTXT), R6
@@ -344,32 +347,65 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
 	NO_LOCAL_POINTERS;			\
 	/* copy arguments to stack */		\
 	MOVV	arg+16(FP), R4;			\
-	MOVWU	argsize+24(FP), R5;			\
-	MOVV	R3, R12;				\
+	MOVWU	argsize+24(FP), R5;		\
+	MOVV	R3, R12;			\
+	MOVV	$16, R13;			\
 	ADDV	$8, R12;			\
-	ADDV	R12, R5;				\
-	BEQ	R12, R5, 6(PC);				\
-	MOVBU	(R4), R6;			\
-	ADDV	$1, R4;			\
-	MOVBU	R6, (R12);			\
-	ADDV	$1, R12;			\
-	JMP	-5(PC);				\
+	BLT	R5, R13, check8;		\
+	/* copy 16 bytes a time */		\
+	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R16;	\
+	BEQ	R16, copy16_again;		\
+loop16:;					\
+	VMOVQ	(R4), V0;			\
+	ADDV	$16, R4;			\
+	ADDV	$-16, R5;			\
+	VMOVQ	V0, (R12);			\
+	ADDV	$16, R12;			\
+	BGE	R5, R13, loop16;		\
+	JMP	check8;				\
+copy16_again:;					\
+	MOVV	(R4), R14;			\
+	MOVV	8(R4), R15;			\
+	ADDV	$16, R4;			\
+	ADDV	$-16, R5;			\
+	MOVV	R14, (R12);			\
+	MOVV	R15, 8(R12);			\
+	ADDV	$16, R12;			\
+	BGE	R5, R13, copy16_again;		\
+check8:;					\
+	/* R13 = 8 */;				\
+	SRLV	$1, R13;			\
+	BLT	R5, R13, 6(PC);			\
+	/* copy 8 bytes a time */		\
+	MOVV	(R4), R14;			\
+	ADDV	$8, R4;				\
+	ADDV	$-8, R5;			\
+	MOVV	R14, (R12);			\
+	ADDV	$8, R12;			\
+	BEQ     R5, R0, 7(PC);  		\
+	/* copy 1 byte a time for the rest */	\
+	MOVBU   (R4), R14;      		\
+	ADDV    $1, R4;         		\
+	ADDV    $-1, R5;        		\
+	MOVBU   R14, (R12);     		\
+	ADDV    $1, R12;        		\
+	JMP     -6(PC);         		\
 	/* set up argument registers */		\
 	MOVV	regArgs+40(FP), R25;		\
 	JAL	·unspillArgs(SB);		\
 	/* call function */			\
-	MOVV	f+8(FP), REGCTXT;			\
+	MOVV	f+8(FP), REGCTXT;		\
 	MOVV	(REGCTXT), R25;			\
 	PCDATA  $PCDATA_StackMapIndex, $0;	\
 	JAL	(R25);				\
 	/* copy return values back */		\
 	MOVV	regArgs+40(FP), R25;		\
-	JAL	·spillArgs(SB);		\
+	JAL	·spillArgs(SB);			\
 	MOVV	argtype+0(FP), R7;		\
 	MOVV	arg+16(FP), R4;			\
 	MOVWU	n+24(FP), R5;			\
 	MOVWU	retoffset+28(FP), R6;		\
-	ADDV	$8, R3, R12;				\
+	ADDV	$8, R3, R12;			\
 	ADDV	R6, R12; 			\
 	ADDV	R6, R4;				\
 	SUBVU	R6, R5;				\
@@ -882,6 +918,229 @@ TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
 	MOVV	$64, R29
 	JMP	gcWriteBarrier<>(SB)

+DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
+GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
+
+// debugCallV2 is the entry point for debugger-injected function
+// calls on running goroutines. It informs the runtime that a
+// debug call has been injected and creates a call frame for the
+// debugger to fill in.
+//
+// To inject a function call, a debugger should:
+// 1. Check that the goroutine is in state _Grunning and that
+//    there are at least 280 bytes free on the stack.
+// 2. Set SP as SP-8.
+// 3. Store the current LR in (SP) (using the SP after step 2).
+// 4. Store the current PC in the LR register.
+// 5. Write the desired argument frame size at SP-8
+// 6. Save all machine registers so they can be restored later by the debugger.
+// 7. Set the PC to debugCallV2 and resume execution.
+//
+// If the goroutine is in state _Grunnable, then it's not generally
+// safe to inject a call because it may return out via other runtime
+// operations. Instead, the debugger should unwind the stack to find
+// the return to non-runtime code, add a temporary breakpoint there,
+// and inject the call once that breakpoint is hit.
+//
+// If the goroutine is in any other state, it's not safe to inject a call.
+//
+// This function communicates back to the debugger by setting R19 and
+// invoking BREAK to raise a breakpoint signal. Note that the signal PC of
+// the signal triggered by the BREAK instruction is the PC where the signal
+// is trapped, not the next PC, so to resume execution, the debugger needs
+// to set the signal PC to PC+4. See the comments in the implementation for
+// the protocol the debugger is expected to follow. InjectDebugCall in the
+// runtime tests demonstrates this protocol.
+//
+// The debugger must ensure that any pointers passed to the function
+// obey escape analysis requirements. Specifically, it must not pass
+// a stack pointer to an escaping argument. debugCallV2 cannot check
+// this invariant.
+//
+// This is ABIInternal because Go code injects its PC directly into new
+// goroutine stacks.
+TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
+	MOVV    R1, -272(R3)
+	ADDV    $-272, R3
+
+	// We can't do anything that might clobber any of these
+	// registers before this.
+	MOVV    R2, (4*8)(R3)
+	MOVV    R4, (5*8)(R3)
+	MOVV    R5, (6*8)(R3)
+	MOVV    R6, (7*8)(R3)
+	MOVV    R7, (8*8)(R3)
+	MOVV    R8, (9*8)(R3)
+	MOVV    R9, (10*8)(R3)
+	MOVV    R10, (11*8)(R3)
+	MOVV    R11, (12*8)(R3)
+	MOVV    R12, (13*8)(R3)
+	MOVV    R13, (14*8)(R3)
+	MOVV    R14, (15*8)(R3)
+	MOVV    R15, (16*8)(R3)
+	MOVV    R16, (17*8)(R3)
+	MOVV    R17, (18*8)(R3)
+	MOVV    R18, (19*8)(R3)
+	MOVV    R19, (20*8)(R3)
+	MOVV    R20, (21*8)(R3)
+	MOVV    R21, (22*8)(R3)
+	MOVV    g, (23*8)(R3)
+	MOVV    R23, (24*8)(R3)
+	MOVV    R24, (25*8)(R3)
+	MOVV    R25, (26*8)(R3)
+	MOVV    R26, (27*8)(R3)
+	MOVV    R27, (28*8)(R3)
+	MOVV    R28, (29*8)(R3)
+	MOVV    R29, (30*8)(R3)
+	MOVV    R30, (31*8)(R3)
+	MOVV    R31, (32*8)(R3)
+
+	// Perform a safe-point check.
+	MOVV    R1, 8(R3)
+	CALL    runtime·debugCallCheck(SB)
+	MOVV    16(R3), R30
+	BEQ R30, good
+
+	// The safety check failed. Put the reason string at the top
+	// of the stack.
+	MOVV    R30, 8(R3)
+
+	MOVV    24(R3), R30
+	MOVV    R30, 16(R3)
+
+	MOVV    $8, R19
+	BREAK
+	JMP restore
+
+good:
+	// Registers are saved and it's safe to make a call.
+	// Open up a call frame, moving the stack if necessary.
+	//
+	// Once the frame is allocated, this will set R19 to 0 and
+	// invoke BREAK. The debugger should write the argument
+	// frame for the call at SP+8, set up argument registers,
+	// set the LR as the signal PC + 4, set the PC to the function
+	// to call, set R29 to point to the closure (if a closure call),
+	// and resume execution.
+	//
+	// If the function returns, this will set R19 to 1 and invoke
+	// BREAK. The debugger can then inspect any return value saved
+	// on the stack at SP+8 and in registers. To resume execution,
+	// the debugger should restore the LR from (SP).
+	//
+	// If the function panics, this will set R19 to 2 and invoke BREAK.
+	// The interface{} value of the panic will be at SP+8. The debugger
+	// can inspect the panic value and resume execution again.
+#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
+	MOVV    $MAXSIZE, R27;         \
+	BLT R27, R30, 5(PC);            \
+	MOVV    $NAME(SB), R28;			\
+	MOVV    R28, 8(R3);			\
+	CALL    runtime·debugCallWrap(SB);	\
+	JMP restore
+
+	MOVV    264(R3), R30 // the argument frame size
+	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
+	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
+	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
+	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
+	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
+	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
+	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
+	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
+	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
+	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
+	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
+	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
+	// The frame size is too large. Report the error.
+	MOVV    $debugCallFrameTooLarge<>(SB), R30
+	MOVV    R30, 8(R3)
+	MOVV    $20, R30
+	MOVV    R30, 16(R3) // length of debugCallFrameTooLarge string
+	MOVV    $8, R19
+	BREAK
+	JMP restore
+
+restore:
+	// Calls and failures resume here.
+	//
+	// Set R19 to 16 and invoke BREAK. The debugger should restore
+	// all registers except for PC and SP and resume execution.
+	MOVV    $16, R19
+	BREAK
+	// We must not modify flags after this point.
+
+	// Restore pointer-containing registers, which may have been
+	// modified from the debugger's copy by stack copying.
+	MOVV    (4*8)(R3), R2
+	MOVV    (5*8)(R3), R4
+	MOVV    (6*8)(R3), R5
+	MOVV    (7*8)(R3), R6
+	MOVV    (8*8)(R3), R7
+	MOVV    (9*8)(R3), R8
+	MOVV    (10*8)(R3), R9
+	MOVV    (11*8)(R3), R10
+	MOVV    (12*8)(R3), R11
+	MOVV    (13*8)(R3), R12
+	MOVV    (14*8)(R3), R13
+	MOVV    (15*8)(R3), R14
+	MOVV    (16*8)(R3), R15
+	MOVV    (17*8)(R3), R16
+	MOVV    (18*8)(R3), R17
+	MOVV    (19*8)(R3), R18
+	MOVV    (20*8)(R3), R19
+	MOVV    (21*8)(R3), R20
+	MOVV    (22*8)(R3), R21
+	MOVV    (23*8)(R3), g
+	MOVV    (24*8)(R3), R23
+	MOVV    (25*8)(R3), R24
+	MOVV    (26*8)(R3), R25
+	MOVV    (27*8)(R3), R26
+	MOVV    (28*8)(R3), R27
+	MOVV    (29*8)(R3), R28
+	MOVV    (30*8)(R3), R29
+	MOVV    (31*8)(R3), R30
+	MOVV    (32*8)(R3), R31
+
+	MOVV    0(R3), R30
+	ADDV    $280, R3 // Add 8 more bytes, see saveSigContext
+	MOVV    -8(R3), R1
+	JMP (R30)
+
+// runtime.debugCallCheck assumes that functions defined with the
+// DEBUG_CALL_FN macro are safe points to inject calls.
+#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
+TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
+	NO_LOCAL_POINTERS;		\
+	MOVV    $0, R19;		\
+	BREAK;		\
+	MOVV    $1, R19;		\
+	BREAK;		\
+	RET
+DEBUG_CALL_FN(debugCall32<>, 32)
+DEBUG_CALL_FN(debugCall64<>, 64)
+DEBUG_CALL_FN(debugCall128<>, 128)
+DEBUG_CALL_FN(debugCall256<>, 256)
+DEBUG_CALL_FN(debugCall512<>, 512)
+DEBUG_CALL_FN(debugCall1024<>, 1024)
+DEBUG_CALL_FN(debugCall2048<>, 2048)
+DEBUG_CALL_FN(debugCall4096<>, 4096)
+DEBUG_CALL_FN(debugCall8192<>, 8192)
+DEBUG_CALL_FN(debugCall16384<>, 16384)
+DEBUG_CALL_FN(debugCall32768<>, 32768)
+DEBUG_CALL_FN(debugCall65536<>, 65536)
+
+// func debugCallPanicked(val interface{})
+TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
+	// Copy the panic value to the top of stack at SP+8.
+	MOVV    val_type+0(FP), R30
+	MOVV    R30, 8(R3)
+	MOVV    val_data+8(FP), R30
+	MOVV    R30, 16(R3)
+	MOVV    $2, R19
+	BREAK
+	RET
+
 // Note: these functions use a special calling convention to save generated code space.
 // Arguments are passed in registers, but the space for those arguments are allocated
 // in the caller's stack frame. These stubs write the args into that stack space and
--- a/src/runtime/asm_riscv64.h
+++ b/src/runtime/asm_riscv64.h
@@ -0,0 +1,12 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Define features that are guaranteed to be supported by setting the GORISCV64 variable.
+// If a feature is supported, there's no need to check it at runtime every time.
+
+#ifdef GORISCV64_rva22u64
+#define hasZba
+#define hasZbb
+#define hasZbs
+#endif
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -80,12 +80,11 @@ TEXT setg_gcc<>(SB),NOSPLIT,$0-0
 	RET

 // func cputicks() int64
-TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+TEXT runtime·cputicks<ABIInternal>(SB),NOSPLIT,$0-0
 	// RDTIME to emulate cpu ticks
 	// RDCYCLE reads counter that is per HART(core) based
 	// according to the riscv manual, see issue 46737
-	RDTIME	A0
-	MOV	A0, ret+0(FP)
+	RDTIME	X10
 	RET

 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
--- a/src/runtime/asm_wasm.s
+++ b/src/runtime/asm_wasm.s
@@ -554,5 +554,73 @@ TEXT wasm_pc_f_loop(SB),NOSPLIT,$0

 	Return

+// wasm_pc_f_loop_export is like wasm_pc_f_loop, except that this takes an
+// argument (on Wasm stack) that is a PC_F, and the loop stops when we get
+// to that PC in a normal return (not unwinding).
+// This is for handling an wasmexport function when it needs to switch the
+// stack.
+TEXT wasm_pc_f_loop_export(SB),NOSPLIT,$0
+	Get PAUSE
+	I32Eqz
+outer:
+	If
+		// R1 is whether a function return normally (0) or unwinding (1).
+		// Start with unwinding.
+		I32Const $1
+		Set R1
+	loop:
+		Loop
+			// Get PC_F & PC_B from -8(SP)
+			Get SP
+			I32Const $8
+			I32Sub
+			I32Load16U $2 // PC_F
+			Tee R2
+
+			Get R0
+			I32Eq
+			If // PC_F == R0, we're at the stop PC
+				Get R1
+				I32Eqz
+				// Break if it is a normal return
+				BrIf outer // actually jump to after the corresponding End
+			End
+
+			Get SP
+			I32Const $8
+			I32Sub
+			I32Load16U $0 // PC_B
+
+			Get R2 // PC_F
+			CallIndirect $0
+			Set R1 // save return/unwinding state for next iteration
+
+			Get PAUSE
+			I32Eqz
+			BrIf loop
+		End
+	End
+
+	I32Const $0
+	Set PAUSE
+
+	Return
+
 TEXT wasm_export_lib(SB),NOSPLIT,$0
 	UNDEF
+
+TEXT runtime·pause(SB), NOSPLIT, $0-8
+	MOVD newsp+0(FP), SP
+	I32Const $1
+	Set PAUSE
+	RETUNWIND
+
+// Called if a wasmexport function is called before runtime initialization
+TEXT runtime·notInitialized(SB), NOSPLIT, $0
+	MOVD $runtime·wasmStack+(m0Stack__size-16-8)(SB), SP
+	I32Const $0 // entry PC_B
+	Call runtime·notInitialized1(SB)
+	Drop
+	I32Const $0 // entry PC_B
+	Call runtime·abort(SB)
+	UNDEF
--- a/src/runtime/atomic_loong64.s
+++ b/src/runtime/atomic_loong64.s
@@ -5,5 +5,5 @@
 #include "textflag.h"

 TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
-	DBAR
+	DBAR	$0x1A // StoreStore barrier
 	RET
--- a/src/runtime/bitcursor_test.go
+++ b/src/runtime/bitcursor_test.go
@@ -0,0 +1,49 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"testing"
+)
+
+func TestBitCursor(t *testing.T) {
+	ones := [5]byte{0xff, 0xff, 0xff, 0xff, 0xff}
+	zeros := [5]byte{0, 0, 0, 0, 0}
+
+	for start := uintptr(0); start < 16; start++ {
+		for end := start + 1; end < 32; end++ {
+			buf := zeros
+			NewBitCursor(&buf[0]).Offset(start).Write(&ones[0], end-start)
+
+			for i := uintptr(0); i < uintptr(len(buf)*8); i++ {
+				bit := buf[i/8] >> (i % 8) & 1
+				if bit == 0 && i >= start && i < end {
+					t.Errorf("bit %d not set in [%d:%d]", i, start, end)
+				}
+				if bit == 1 && (i < start || i >= end) {
+					t.Errorf("bit %d is set outside [%d:%d]", i, start, end)
+				}
+			}
+		}
+	}
+
+	for start := uintptr(0); start < 16; start++ {
+		for end := start + 1; end < 32; end++ {
+			buf := ones
+			NewBitCursor(&buf[0]).Offset(start).Write(&zeros[0], end-start)
+
+			for i := uintptr(0); i < uintptr(len(buf)*8); i++ {
+				bit := buf[i/8] >> (i % 8) & 1
+				if bit == 1 && i >= start && i < end {
+					t.Errorf("bit %d not cleared in [%d:%d]", i, start, end)
+				}
+				if bit == 0 && (i < start || i >= end) {
+					t.Errorf("bit %d cleared outside [%d:%d]", i, start, end)
+				}
+			}
+		}
+	}
+}
--- a/src/runtime/callers_test.go
+++ b/src/runtime/callers_test.go
@@ -5,8 +5,8 @@
 package runtime_test

 import (
-	"reflect"
 	"runtime"
+	"slices"
 	"strings"
 	"testing"
 )
@@ -80,7 +80,7 @@ func testCallersEqual(t *testing.T, pcs []uintptr, want []string) {
 		}
 		got = append(got, frame.Function)
 	}
-	if !reflect.DeepEqual(want, got) {
+	if !slices.Equal(want, got) {
 		t.Fatalf("wanted %v, got %v", want, got)
 	}
 }
--- a/src/runtime/cgo.go
+++ b/src/runtime/cgo.go
@@ -72,11 +72,20 @@ var cgoHasExtraM bool
 // cgoUse should not actually be called (see cgoAlwaysFalse).
 func cgoUse(any) { throw("cgoUse should not be called") }

+// cgoKeepAlive is called by cgo-generated code (using go:linkname to get at
+// an unexported name). This call keeps its argument alive until the call site;
+// cgo emits the call after the last possible use of the argument by C code.
+// cgoKeepAlive is marked in the cgo-generated code as //go:noescape, so
+// unlike cgoUse it does not force the argument to escape to the heap.
+// This is used to implement the #cgo noescape directive.
+func cgoKeepAlive(any) { throw("cgoKeepAlive should not be called") }
+
 // cgoAlwaysFalse is a boolean value that is always false.
-// The cgo-generated code says if cgoAlwaysFalse { cgoUse(p) }.
+// The cgo-generated code says if cgoAlwaysFalse { cgoUse(p) },
+// or if cgoAlwaysFalse { cgoKeepAlive(p) }.
 // The compiler cannot see that cgoAlwaysFalse is always false,
 // so it emits the test and keeps the call, giving the desired
-// escape analysis result. The test is cheaper than the call.
+// escape/alive analysis result. The test is cheaper than the call.
 var cgoAlwaysFalse bool

 var cgo_yield = &_cgo_yield
--- a/src/runtime/cgo/cgo.go
+++ b/src/runtime/cgo/cgo.go
@@ -32,7 +32,7 @@ package cgo
 */
 import "C"

-import "runtime/internal/sys"
+import "internal/runtime/sys"

 // Incomplete is used specifically for the semantics of incomplete C types.
 type Incomplete struct {
--- a/src/runtime/cgo/gcc_darwin_amd64.c
+++ b/src/runtime/cgo/gcc_darwin_amd64.c
@@ -34,6 +34,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	size = pthread_get_stacksize_np(pthread_self());
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, size);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
 	err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
--- a/src/runtime/cgo/gcc_darwin_arm64.c
+++ b/src/runtime/cgo/gcc_darwin_arm64.c
@@ -39,6 +39,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	size = pthread_get_stacksize_np(pthread_self());
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, size);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
 	err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
--- a/src/runtime/cgo/gcc_dragonfly_amd64.c
+++ b/src/runtime/cgo/gcc_dragonfly_amd64.c
@@ -33,6 +33,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);

 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
--- a/src/runtime/cgo/gcc_freebsd.c
+++ b/src/runtime/cgo/gcc_freebsd.c
@@ -45,6 +45,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_freebsd_amd64.c
+++ b/src/runtime/cgo/gcc_freebsd_amd64.c
@@ -43,6 +43,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_libinit.c
+++ b/src/runtime/cgo/gcc_libinit.c
@@ -37,8 +37,12 @@ static void (*cgo_context_function)(struct context_arg*);

 void
 x_cgo_sys_thread_create(void* (*func)(void*), void* arg) {
+	pthread_attr_t attr;
 	pthread_t p;
-	int err = _cgo_try_pthread_create(&p, NULL, func, arg);
+
+	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+	int err = _cgo_try_pthread_create(&p, &attr, func, arg);
 	if (err != 0) {
 		fprintf(stderr, "pthread_create failed: %s", strerror(err));
 		abort();
@@ -153,7 +157,6 @@ _cgo_try_pthread_create(pthread_t* thread, const pthread_attr_t* attr, void* (*p
 	for (tries = 0; tries < 20; tries++) {
 		err = pthread_create(thread, attr, pfn, arg);
 		if (err == 0) {
-			pthread_detach(*thread);
 			return 0;
 		}
 		if (err != EAGAIN) {
--- a/src/runtime/cgo/gcc_libinit_windows.c
+++ b/src/runtime/cgo/gcc_libinit_windows.c
@@ -2,6 +2,12 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+# ifdef __CYGWIN__
+#error "don't use the cygwin compiler to build native Windows programs; use MinGW instead"
+#else
+// Exclude the following code from Cygwin builds.
+// Cygwin doesn't implement process.h nor does it support _beginthread.
+
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <process.h>
@@ -156,3 +162,5 @@ void _cgo_beginthread(void (*func)(void*), void* arg) {
 	fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
 	abort();
 }
+
+#endif // __CYGWIN__
--- a/src/runtime/cgo/gcc_linux.c
+++ b/src/runtime/cgo/gcc_linux.c
@@ -40,6 +40,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_linux_amd64.c
+++ b/src/runtime/cgo/gcc_linux_amd64.c
@@ -63,6 +63,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_linux_arm64.c
+++ b/src/runtime/cgo/gcc_linux_arm64.c
@@ -28,6 +28,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_linux_s390x.c
+++ b/src/runtime/cgo/gcc_linux_s390x.c
@@ -33,6 +33,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_netbsd.c
+++ b/src/runtime/cgo/gcc_netbsd.c
@@ -35,6 +35,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_openbsd.c
+++ b/src/runtime/cgo/gcc_openbsd.c
@@ -34,6 +34,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);

 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
--- a/src/runtime/cgo/gcc_ppc64x.c
+++ b/src/runtime/cgo/gcc_ppc64x.c
@@ -35,6 +35,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
 	pthread_sigmask(SIG_SETMASK, &ign, &oset);

 	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_attr_getstacksize(&attr, &size);
 	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
 	ts->g->stackhi = size;
--- a/src/runtime/cgo/gcc_stack_unix.c
+++ b/src/runtime/cgo/gcc_stack_unix.c
@@ -21,7 +21,7 @@ x_cgo_getstackbound(uintptr bounds[2])
 	// Needed before pthread_getattr_np, too, since before glibc 2.32
 	// it did not call pthread_attr_init in all cases (see #65625).
 	pthread_attr_init(&attr);
-#if defined(__GLIBC__) || (defined(__sun) && !defined(__illumos__))
+#if defined(__GLIBC__) || defined(__BIONIC__) || (defined(__sun) && !defined(__illumos__))
 	// pthread_getattr_np is a GNU extension supported in glibc.
 	// Solaris is not glibc but does support pthread_getattr_np
 	// (and the fallback doesn't work...). Illumos does not.
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -88,7 +88,7 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/goexperiment"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -425,6 +425,13 @@ func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) {
 	restore := true
 	defer unwindm(&restore)

+	var ditAlreadySet bool
+	if debug.dataindependenttiming == 1 && gp.m.isextra {
+		// We only need to enable DIT for threads that were created by C, as it
+		// should already by enabled on threads that were created by Go.
+		ditAlreadySet = sys.EnableDIT()
+	}
+
 	if raceenabled {
 		raceacquire(unsafe.Pointer(&racecgosync))
 	}
@@ -440,6 +447,11 @@ func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) {
 		racereleasemerge(unsafe.Pointer(&racecgosync))
 	}

+	if debug.dataindependenttiming == 1 && !ditAlreadySet {
+		// Only unset DIT if it wasn't already enabled when cgocallback was called.
+		sys.DisableDIT()
+	}
+
 	// Do not unwind m->g0->sched.sp.
 	// Our caller, cgocallback, will do that.
 	restore = false
@@ -558,6 +570,17 @@ func cgoCheckPointer(ptr any, arg any) {
 			ep = aep
 			t = ep._type
 			top = false
+		case abi.Pointer:
+			// The Go code is indexing into a pointer to an array,
+			// and we have been passed the pointer-to-array.
+			// Check the array rather than the pointer.
+			pt := (*abi.PtrType)(unsafe.Pointer(aep._type))
+			t = pt.Elem
+			if t.Kind_&abi.KindMask != abi.Array {
+				throw("can't happen")
+			}
+			ep = aep
+			top = false
 		default:
 			throw("can't happen")
 		}
--- a/src/runtime/cgocheck.go
+++ b/src/runtime/cgocheck.go
@@ -8,7 +8,6 @@
 package runtime

 import (
-	"internal/abi"
 	"internal/goarch"
 	"unsafe"
 )
@@ -142,52 +141,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		size = ptrdataSize
 	}

-	if typ.Kind_&abi.KindGCProg == 0 {
-		cgoCheckBits(src, typ.GCData, off, size)
-		return
-	}
-
-	// The type has a GC program. Try to find GC bits somewhere else.
-	for _, datap := range activeModules() {
-		if cgoInRange(src, datap.data, datap.edata) {
-			doff := uintptr(src) - datap.data
-			cgoCheckBits(add(src, -doff), datap.gcdatamask.bytedata, off+doff, size)
-			return
-		}
-		if cgoInRange(src, datap.bss, datap.ebss) {
-			boff := uintptr(src) - datap.bss
-			cgoCheckBits(add(src, -boff), datap.gcbssmask.bytedata, off+boff, size)
-			return
-		}
-	}
-
-	s := spanOfUnchecked(uintptr(src))
-	if s.state.get() == mSpanManual {
-		// There are no heap bits for value stored on the stack.
-		// For a channel receive src might be on the stack of some
-		// other goroutine, so we can't unwind the stack even if
-		// we wanted to.
-		// We can't expand the GC program without extra storage
-		// space we can't easily get.
-		// Fortunately we have the type information.
-		systemstack(func() {
-			cgoCheckUsingType(typ, src, off, size)
-		})
-		return
-	}
-
-	// src must be in the regular heap.
-	tp := s.typePointersOf(uintptr(src), size)
-	for {
-		var addr uintptr
-		if tp, addr = tp.next(uintptr(src) + size); addr == 0 {
-			break
-		}
-		v := *(*unsafe.Pointer)(unsafe.Pointer(addr))
-		if cgoIsGoPointer(v) && !isPinned(v) {
-			throw(cgoWriteBarrierFail)
-		}
-	}
+	cgoCheckBits(src, getGCMask(typ), off, size)
 }

 // cgoCheckBits checks the block of memory at src, for up to size
@@ -245,48 +199,5 @@ func cgoCheckUsingType(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		size = ptrdataSize
 	}

-	if typ.Kind_&abi.KindGCProg == 0 {
-		cgoCheckBits(src, typ.GCData, off, size)
-		return
-	}
-	switch typ.Kind_ & abi.KindMask {
-	default:
-		throw("can't happen")
-	case abi.Array:
-		at := (*arraytype)(unsafe.Pointer(typ))
-		for i := uintptr(0); i < at.Len; i++ {
-			if off < at.Elem.Size_ {
-				cgoCheckUsingType(at.Elem, src, off, size)
-			}
-			src = add(src, at.Elem.Size_)
-			skipped := off
-			if skipped > at.Elem.Size_ {
-				skipped = at.Elem.Size_
-			}
-			checked := at.Elem.Size_ - skipped
-			off -= skipped
-			if size <= checked {
-				return
-			}
-			size -= checked
-		}
-	case abi.Struct:
-		st := (*structtype)(unsafe.Pointer(typ))
-		for _, f := range st.Fields {
-			if off < f.Typ.Size_ {
-				cgoCheckUsingType(f.Typ, src, off, size)
-			}
-			src = add(src, f.Typ.Size_)
-			skipped := off
-			if skipped > f.Typ.Size_ {
-				skipped = f.Typ.Size_
-			}
-			checked := f.Typ.Size_ - skipped
-			off -= skipped
-			if size <= checked {
-				return
-			}
-			size -= checked
-		}
-	}
+	cgoCheckBits(src, getGCMask(typ), off, size)
 }
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -20,7 +20,8 @@ package runtime
 import (
 	"internal/abi"
 	"internal/runtime/atomic"
-	"runtime/internal/math"
+	"internal/runtime/math"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -35,6 +36,7 @@ type hchan struct {
 	dataqsiz uint           // size of the circular queue
 	buf      unsafe.Pointer // points to an array of dataqsiz elements
 	elemsize uint16
+	synctest bool // true if created in a synctest bubble
 	closed   uint32
 	timer    *timer // timer feeding this chan
 	elemtype *_type // element type
@@ -111,6 +113,9 @@ func makechan(t *chantype, size int) *hchan {
 	c.elemsize = uint16(elem.Size_)
 	c.elemtype = elem
 	c.dataqsiz = uint(size)
+	if getg().syncGroup != nil {
+		c.synctest = true
+	}
 	lockInit(&c.lock, lockRankHchan)

 	if debugChan {
@@ -153,7 +158,7 @@ func full(c *hchan) bool {
 //
 //go:nosplit
 func chansend1(c *hchan, elem unsafe.Pointer) {
-	chansend(c, elem, true, getcallerpc())
+	chansend(c, elem, true, sys.GetCallerPC())
 }

 /*
@@ -185,6 +190,10 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 		racereadpc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(chansend))
 	}

+	if c.synctest && getg().syncGroup == nil {
+		panic(plainError("send on synctest channel from outside bubble"))
+	}
+
 	// Fast path: check for failed non-blocking operation without acquiring the lock.
 	//
 	// After observing that the channel is not closed, we observe that the channel is
@@ -267,7 +276,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 	// changes and when we set gp.activeStackChans is not safe for
 	// stack shrinking.
 	gp.parkingOnChan.Store(true)
-	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceBlockChanSend, 2)
+	reason := waitReasonChanSend
+	if c.synctest {
+		reason = waitReasonSynctestChanSend
+	}
+	gopark(chanparkcommit, unsafe.Pointer(&c.lock), reason, traceBlockChanSend, 2)
 	// Ensure the value being sent is kept alive until the
 	// receiver copies it out. The sudog has a pointer to the
 	// stack object, but sudogs aren't considered as roots of the
@@ -303,6 +316,10 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 // sg must already be dequeued from c.
 // ep must be non-nil and point to the heap or the caller's stack.
 func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
+	if c.synctest && sg.g.syncGroup != getg().syncGroup {
+		unlockf()
+		panic(plainError("send on synctest channel from outside bubble"))
+	}
 	if raceenabled {
 		if c.dataqsiz == 0 {
 			racesync(c, sg)
@@ -406,7 +423,7 @@ func closechan(c *hchan) {
 	}

 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(closechan))
 		racerelease(c.raceaddr())
 	}
@@ -517,6 +534,10 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 		throw("unreachable")
 	}

+	if c.synctest && getg().syncGroup == nil {
+		panic(plainError("receive on synctest channel from outside bubble"))
+	}
+
 	if c.timer != nil {
 		c.timer.maybeRunChan()
 	}
@@ -636,7 +657,11 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 	// changes and when we set gp.activeStackChans is not safe for
 	// stack shrinking.
 	gp.parkingOnChan.Store(true)
-	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceBlockChanRecv, 2)
+	reason := waitReasonChanReceive
+	if c.synctest {
+		reason = waitReasonSynctestChanReceive
+	}
+	gopark(chanparkcommit, unsafe.Pointer(&c.lock), reason, traceBlockChanRecv, 2)

 	// someone woke us up
 	if mysg != gp.waiting {
@@ -672,6 +697,10 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 // sg must already be dequeued from c.
 // A non-nil ep must point to the heap or the caller's stack.
 func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
+	if c.synctest && sg.g.syncGroup != getg().syncGroup {
+		unlockf()
+		panic(plainError("receive on synctest channel from outside bubble"))
+	}
 	if c.dataqsiz == 0 {
 		if raceenabled {
 			racesync(c, sg)
@@ -750,7 +779,7 @@ func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
 //		... bar
 //	}
 func selectnbsend(c *hchan, elem unsafe.Pointer) (selected bool) {
-	return chansend(c, elem, false, getcallerpc())
+	return chansend(c, elem, false, sys.GetCallerPC())
 }

 // compiler implements
@@ -775,7 +804,7 @@ func selectnbrecv(elem unsafe.Pointer, c *hchan) (selected, received bool) {

 //go:linkname reflect_chansend reflect.chansend0
 func reflect_chansend(c *hchan, elem unsafe.Pointer, nb bool) (selected bool) {
-	return chansend(c, elem, !nb, getcallerpc())
+	return chansend(c, elem, !nb, sys.GetCallerPC())
 }

 //go:linkname reflect_chanrecv reflect.chanrecv
@@ -875,8 +904,11 @@ func (q *waitq) dequeue() *sudog {
 		// We use a flag in the G struct to tell us when someone
 		// else has won the race to signal this goroutine but the goroutine
 		// hasn't removed itself from the queue yet.
-		if sgp.isSelect && !sgp.g.selectDone.CompareAndSwap(0, 1) {
-			continue
+		if sgp.isSelect {
+			if !sgp.g.selectDone.CompareAndSwap(0, 1) {
+				// We lost the race to wake this goroutine.
+				continue
+			}
 		}

 		return sgp
--- a/src/runtime/coro.go
+++ b/src/runtime/coro.go
@@ -4,7 +4,10 @@

 package runtime

-import "unsafe"
+import (
+	"internal/runtime/sys"
+	"unsafe"
+)

 // A coro represents extra concurrency without extra parallelism,
 // as would be needed for a coroutine implementation.
@@ -39,7 +42,7 @@ type coro struct {
 func newcoro(f func(*coro)) *coro {
 	c := new(coro)
 	c.f = f
-	pc := getcallerpc()
+	pc := sys.GetCallerPC()
 	gp := getg()
 	systemstack(func() {
 		mp := gp.m
@@ -134,6 +137,16 @@ func coroswitch_m(gp *g) {
 	// emitting an event for every single transition.
 	trace := traceAcquire()

+	canCAS := true
+	sg := gp.syncGroup
+	if sg != nil {
+		// If we're in a synctest group, always use casgstatus (which tracks
+		// group idleness) rather than directly CASing. Mark the group as active
+		// while we're in the process of transferring control.
+		canCAS = false
+		sg.incActive()
+	}
+
 	if locked {
 		// Detach the goroutine from the thread; we'll attach to the goroutine we're
 		// switching to before returning.
@@ -152,7 +165,7 @@ func coroswitch_m(gp *g) {
 		// If we can CAS ourselves directly from running to waiting, so do,
 		// keeping the control transfer as lightweight as possible.
 		gp.waitreason = waitReasonCoroutine
-		if !gp.atomicstatus.CompareAndSwap(_Grunning, _Gwaiting) {
+		if !canCAS || !gp.atomicstatus.CompareAndSwap(_Grunning, _Gwaiting) {
 			// The CAS failed: use casgstatus, which will take care of
 			// coordinating with the garbage collector about the state change.
 			casgstatus(gp, _Grunning, _Gwaiting)
@@ -220,7 +233,7 @@ func coroswitch_m(gp *g) {
 		tryRecordGoroutineProfile(gnext, nil, osyield)
 	}

-	if !gnext.atomicstatus.CompareAndSwap(_Gwaiting, _Grunning) {
+	if !canCAS || !gnext.atomicstatus.CompareAndSwap(_Gwaiting, _Grunning) {
 		// The CAS failed: use casgstatus, which will take care of
 		// coordinating with the garbage collector about the state change.
 		casgstatus(gnext, _Gwaiting, _Grunnable)
@@ -238,6 +251,10 @@ func coroswitch_m(gp *g) {
 		traceRelease(trace)
 	}

+	if sg != nil {
+		sg.decActive()
+	}
+
 	// Switch to gnext. Does not return.
 	gogo(&gnext.sched)
 }
--- a/src/runtime/coverage/coverage.go
+++ b/src/runtime/coverage/coverage.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// Package coverage contains APIs for writing coverage profile data at runtime
+// from long-running and/or server programs that do not terminate via [os.Exit].
 package coverage

 import (
--- a/src/runtime/cpuflags.go
+++ b/src/runtime/cpuflags.go
@@ -19,6 +19,8 @@ const (
 	offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)

 	offsetMIPS64XHasMSA = unsafe.Offsetof(cpu.MIPS64X.HasMSA)
+
+	offsetLOONG64HasLSX = unsafe.Offsetof(cpu.Loong64.HasLSX)
 )

 var (
@@ -31,4 +33,8 @@ var (
 	armHasVFPv4 bool

 	arm64HasATOMICS bool
+
+	loong64HasLAMCAS bool
+	loong64HasLAM_BH bool
+	loong64HasLSX    bool
 )
--- a/src/runtime/cpuflags_amd64.go
+++ b/src/runtime/cpuflags_amd64.go
@@ -8,17 +8,31 @@ import (
 	"internal/cpu"
 )

-var useAVXmemmove bool
+var memmoveBits uint8
+
+const (
+	// avxSupported indicates that the CPU supports AVX instructions.
+	avxSupported = 1 << 0
+
+	// repmovsPreferred indicates that REP MOVSx instruction is more
+	// efficient on the CPU.
+	repmovsPreferred = 1 << 1
+)

 func init() {
-	// Let's remove stepping and reserved fields
-	processor := processorVersionInfo & 0x0FFF3FF0
-
-	isIntelBridgeFamily := isIntel &&
-		processor == 0x206A0 ||
-		processor == 0x206D0 ||
-		processor == 0x306A0 ||
-		processor == 0x306E0
-
-	useAVXmemmove = cpu.X86.HasAVX && !isIntelBridgeFamily
+	// Here we assume that on modern CPUs with both FSRM and ERMS features,
+	// copying data blocks of 2KB or larger using the REP MOVSB instruction
+	// will be more efficient to avoid having to keep up with CPU generations.
+	// Therefore, we may retain a BlockList mechanism to ensure that microarchitectures
+	// that do not fit this case may appear in the future.
+	// We enable it on Intel CPUs first, and we may support more platforms
+	// in the future.
+	isERMSNiceCPU := isIntel
+	useREPMOV := isERMSNiceCPU && cpu.X86.HasERMS && cpu.X86.HasFSRM
+	if cpu.X86.HasAVX {
+		memmoveBits |= avxSupported
+	}
+	if useREPMOV {
+		memmoveBits |= repmovsPreferred
+	}
 }
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -14,7 +14,7 @@ package runtime

 import (
 	"internal/abi"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -65,10 +65,6 @@ func TestCgoCallbackGC(t *testing.T) {
 			t.Skip("too slow for mips64x builders")
 		}
 	}
-	if testenv.Builder() == "darwin-amd64-10_14" {
-		// TODO(#23011): When the 10.14 builders are gone, remove this skip.
-		t.Skip("skipping due to platform bug on macOS 10.14; see https://golang.org/issue/43926")
-	}
 	got := runTestProg(t, "testprogcgo", "CgoCallbackGC")
 	want := "OK\n"
 	if got != want {
@@ -754,7 +750,6 @@ func TestNeedmDeadlock(t *testing.T) {
 }

 func TestCgoNoCallback(t *testing.T) {
-	t.Skip("TODO(#56378): enable in Go 1.23")
 	got := runTestProg(t, "testprogcgo", "CgoNoCallback")
 	want := "function marked with #cgo nocallback called back into Go"
 	if !strings.Contains(got, want) {
@@ -763,7 +758,6 @@ func TestCgoNoCallback(t *testing.T) {
 }

 func TestCgoNoEscape(t *testing.T) {
-	t.Skip("TODO(#56378): enable in Go 1.23")
 	got := runTestProg(t, "testprogcgo", "CgoNoEscape")
 	want := "OK\n"
 	if got != want {
@@ -771,6 +765,15 @@ func TestCgoNoEscape(t *testing.T) {
 	}
 }

+// Issue #63739.
+func TestCgoEscapeWithMultiplePointers(t *testing.T) {
+	got := runTestProg(t, "testprogcgo", "CgoEscapeWithMultiplePointers")
+	want := "OK\n"
+	if got != want {
+		t.Fatalf("output is %s; want %s", got, want)
+	}
+}
+
 func TestCgoTracebackGoroutineProfile(t *testing.T) {
 	output := runTestProg(t, "testprogcgo", "GoroutineProfile")
 	want := "OK\n"
@@ -856,3 +859,13 @@ func TestStackSwitchCallback(t *testing.T) {
 		t.Errorf("expected %q, got %v", want, got)
 	}
 }
+
+func TestCgoToGoCallGoexit(t *testing.T) {
+	if runtime.GOOS == "plan9" || runtime.GOOS == "windows" {
+		t.Skipf("no pthreads on %s", runtime.GOOS)
+	}
+	output := runTestProg(t, "testprogcgo", "CgoToGoCallGoexit")
+	if !strings.Contains(output, "runtime.Goexit called in a thread that was not created by the Go runtime") {
+		t.Fatalf("output should contain %s, got %s", "runtime.Goexit called in a thread that was not created by the Go runtime", output)
+	}
+}
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -32,8 +32,11 @@ const entrypointVar = "RUNTIME_TEST_ENTRYPOINT"

 func TestMain(m *testing.M) {
 	switch entrypoint := os.Getenv(entrypointVar); entrypoint {
-	case "crash":
-		crash()
+	case "panic":
+		crashViaPanic()
+		panic("unreachable")
+	case "trap":
+		crashViaTrap()
 		panic("unreachable")
 	default:
 		log.Fatalf("invalid %s: %q", entrypointVar, entrypoint)
@@ -621,8 +624,11 @@ func TestConcurrentMapWrites(t *testing.T) {
 	}
 	testenv.MustHaveGoRun(t)
 	output := runTestProg(t, "testprog", "concurrentMapWrites")
-	want := "fatal error: concurrent map writes"
-	if !strings.HasPrefix(output, want) {
+	want := "fatal error: concurrent map writes\n"
+	// Concurrent writes can corrupt the map in a way that we
+	// detect with a separate throw.
+	want2 := "fatal error: small map with no empty slot (concurrent map writes?)\n"
+	if !strings.HasPrefix(output, want) && !strings.HasPrefix(output, want2) {
 		t.Fatalf("output does not start with %q:\n%s", want, output)
 	}
 }
@@ -632,8 +638,11 @@ func TestConcurrentMapReadWrite(t *testing.T) {
 	}
 	testenv.MustHaveGoRun(t)
 	output := runTestProg(t, "testprog", "concurrentMapReadWrite")
-	want := "fatal error: concurrent map read and map write"
-	if !strings.HasPrefix(output, want) {
+	want := "fatal error: concurrent map read and map write\n"
+	// Concurrent writes can corrupt the map in a way that we
+	// detect with a separate throw.
+	want2 := "fatal error: small map with no empty slot (concurrent map writes?)\n"
+	if !strings.HasPrefix(output, want) && !strings.HasPrefix(output, want2) {
 		t.Fatalf("output does not start with %q:\n%s", want, output)
 	}
 }
@@ -643,12 +652,42 @@ func TestConcurrentMapIterateWrite(t *testing.T) {
 	}
 	testenv.MustHaveGoRun(t)
 	output := runTestProg(t, "testprog", "concurrentMapIterateWrite")
-	want := "fatal error: concurrent map iteration and map write"
-	if !strings.HasPrefix(output, want) {
+	want := "fatal error: concurrent map iteration and map write\n"
+	// Concurrent writes can corrupt the map in a way that we
+	// detect with a separate throw.
+	want2 := "fatal error: small map with no empty slot (concurrent map writes?)\n"
+	if !strings.HasPrefix(output, want) && !strings.HasPrefix(output, want2) {
 		t.Fatalf("output does not start with %q:\n%s", want, output)
 	}
 }

+func TestConcurrentMapWritesIssue69447(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	exe, err := buildTestProg(t, "testprog")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for i := 0; i < 200; i++ {
+		output := runBuiltTestProg(t, exe, "concurrentMapWrites")
+		if output == "" {
+			// If we didn't detect an error, that's ok.
+			// This case makes this test not flaky like
+			// the other ones above.
+			// (More correctly, this case makes this test flaky
+			// in the other direction, in that it might not
+			// detect a problem even if there is one.)
+			continue
+		}
+		want := "fatal error: concurrent map writes\n"
+		// Concurrent writes can corrupt the map in a way that we
+		// detect with a separate throw.
+		want2 := "fatal error: small map with no empty slot (concurrent map writes?)\n"
+		if !strings.HasPrefix(output, want) && !strings.HasPrefix(output, want2) {
+			t.Fatalf("output does not start with %q:\n%s", want, output)
+		}
+	}
+}
+
 type point struct {
 	x, y *int
 }
--- a/src/runtime/debug/mod.go
+++ b/src/runtime/debug/mod.go
@@ -101,6 +101,7 @@ func quoteValue(value string) bool {
 	return strings.ContainsAny(value, " \t\r\n\"`")
 }

+// String returns a string representation of a [BuildInfo].
 func (bi *BuildInfo) String() string {
 	buf := new(strings.Builder)
 	if bi.GoVersion != "" {
@@ -146,6 +147,12 @@ func (bi *BuildInfo) String() string {
 	return buf.String()
 }

+// ParseBuildInfo parses the string returned by [*BuildInfo.String],
+// restoring the original BuildInfo,
+// except that the GoVersion field is not set.
+// Programs should normally not call this function,
+// but instead call [ReadBuildInfo], [debug/buildinfo.ReadFile],
+// or [debug/buildinfo.Read].
 func ParseBuildInfo(data string) (bi *BuildInfo, err error) {
 	lineNum := 1
 	defer func() {
@@ -154,7 +161,7 @@ func ParseBuildInfo(data string) (bi *BuildInfo, err error) {
 		}
 	}()

-	var (
+	const (
 		pathLine  = "path\t"
 		modLine   = "mod\t"
 		depLine   = "dep\t"
@@ -195,7 +202,7 @@ func ParseBuildInfo(data string) (bi *BuildInfo, err error) {
 		switch {
 		case strings.HasPrefix(line, pathLine):
 			elem := line[len(pathLine):]
-			bi.Path = string(elem)
+			bi.Path = elem
 		case strings.HasPrefix(line, modLine):
 			elem := strings.Split(line[len(modLine):], tab)
 			last = &bi.Main
@@ -220,9 +227,9 @@ func ParseBuildInfo(data string) (bi *BuildInfo, err error) {
 				return nil, fmt.Errorf("replacement with no module on previous line")
 			}
 			last.Replace = &Module{
-				Path:    string(elem[0]),
-				Version: string(elem[1]),
-				Sum:     string(elem[2]),
+				Path:    elem[0],
+				Version: elem[1],
+				Sum:     elem[2],
 			}
 			last = nil
 		case strings.HasPrefix(line, buildLine):
--- a/src/runtime/debug/stack.go
+++ b/src/runtime/debug/stack.go
@@ -52,7 +52,7 @@ func SetCrashOutput(f *os.File, opts CrashOptions) error {
 		// The runtime will write to this file descriptor from
 		// low-level routines during a panic, possibly without
 		// a G, so we must call f.Fd() eagerly. This creates a
-		// danger that that the file descriptor is no longer
+		// danger that the file descriptor is no longer
 		// valid at the time of the write, because the caller
 		// (incorrectly) called f.Close() and the kernel
 		// reissued the fd in a later call to open(2), leading
--- a/src/runtime/debug_test.go
+++ b/src/runtime/debug_test.go
@@ -9,13 +9,15 @@
 // spends all of its time in the race runtime, which isn't a safe
 // point.

-//go:build (amd64 || arm64 || ppc64le) && linux && !race
+//go:build (amd64 || arm64 || loong64 || ppc64le) && linux && !race

 package runtime_test

 import (
 	"fmt"
 	"internal/abi"
+	"internal/asan"
+	"internal/msan"
 	"math"
 	"os"
 	"regexp"
@@ -32,6 +34,14 @@ func startDebugCallWorker(t *testing.T) (g *runtime.G, after func()) {
 	// a debugger.
 	skipUnderDebugger(t)

+	// asan/msan instrumentation interferes with tests since we might
+	// inject debugCallV2 while in the asan/msan runtime. This is a
+	// problem for doing things like running the GC or taking stack
+	// traces. Not sure why this is happening yet, but skip for now.
+	if msan.Enabled || asan.Enabled {
+		t.Skip("debugCallV2 is injected erroneously during asan/msan runtime calls; skipping")
+	}
+
 	// This can deadlock if there aren't enough threads or if a GC
 	// tries to interrupt an atomic loop (see issue #10958). Execute
 	// an extra GC to ensure even the sweep phase is done (out of
--- a/src/runtime/debugcall.go
+++ b/src/runtime/debugcall.go
@@ -5,12 +5,13 @@
 // Though the debug call function feature is not enabled on
 // ppc64, inserted ppc64 to avoid missing Go declaration error
 // for debugCallPanicked while building runtime.test
-//go:build amd64 || arm64 || ppc64le || ppc64
+//go:build amd64 || arm64 || loong64 || ppc64le || ppc64

 package runtime

 import (
 	"internal/abi"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -34,7 +35,7 @@ func debugCallCheck(pc uintptr) string {
 	if getg() != getg().m.curg {
 		return debugCallSystemStack
 	}
-	if sp := getcallersp(); !(getg().stack.lo < sp && sp <= getg().stack.hi) {
+	if sp := sys.GetCallerSP(); !(getg().stack.lo < sp && sp <= getg().stack.hi) {
 		// Fast syscalls (nanotime) and racecall switch to the
 		// g0 stack without switching g. We can't safely make
 		// a call in this state. (We can't even safely
@@ -106,7 +107,7 @@ func debugCallCheck(pc uintptr) string {
 //go:nosplit
 func debugCallWrap(dispatch uintptr) {
 	var lockedExt uint32
-	callerpc := getcallerpc()
+	callerpc := sys.GetCallerPC()
 	gp := getg()

 	// Lock ourselves to the OS thread.
--- a/src/runtime/debuglog.go
+++ b/src/runtime/debuglog.go
@@ -12,13 +12,23 @@
 //
 // This facility can be enabled by passing -tags debuglog when
 // building. Without this tag, dlog calls compile to nothing.
+//
+// Implementation notes
+//
+// There are two implementations of the dlog interface: dloggerImpl and
+// dloggerFake. dloggerFake is a no-op implementation. dlogger is type-aliased
+// to one or the other depending on the debuglog build tag. However, both types
+// always exist and are always built. This helps ensure we compile as much of
+// the implementation as possible in the default build configuration, while also
+// enabling us to achieve good test coverage of the real debuglog implementation
+// even when the debuglog build tag is not set.

 package runtime

 import (
 	"internal/abi"
 	"internal/runtime/atomic"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -48,11 +58,20 @@ const debugLogStringLimit = debugLogBytes / 8
 //
 //go:nosplit
 //go:nowritebarrierrec
-func dlog() *dlogger {
-	if !dlogEnabled {
-		return nil
-	}
+func dlog() dlogger {
+	// dlog1 is defined to either dlogImpl or dlogFake.
+	return dlog1()
+}

+//go:nosplit
+//go:nowritebarrierrec
+func dlogFake() dloggerFake {
+	return dloggerFake{}
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func dlogImpl() *dloggerImpl {
 	// Get the time.
 	tick, nano := uint64(cputicks()), uint64(nanotime())

@@ -63,7 +82,7 @@ func dlog() *dlogger {
 	// global pool.
 	if l == nil {
 		allp := (*uintptr)(unsafe.Pointer(&allDloggers))
-		all := (*dlogger)(unsafe.Pointer(atomic.Loaduintptr(allp)))
+		all := (*dloggerImpl)(unsafe.Pointer(atomic.Loaduintptr(allp)))
 		for l1 := all; l1 != nil; l1 = l1.allLink {
 			if l1.owned.Load() == 0 && l1.owned.CompareAndSwap(0, 1) {
 				l = l1
@@ -76,7 +95,7 @@ func dlog() *dlogger {
 	if l == nil {
 		// Use sysAllocOS instead of sysAlloc because we want to interfere
 		// with the runtime as little as possible, and sysAlloc updates accounting.
-		l = (*dlogger)(sysAllocOS(unsafe.Sizeof(dlogger{})))
+		l = (*dloggerImpl)(sysAllocOS(unsafe.Sizeof(dloggerImpl{})))
 		if l == nil {
 			throw("failed to allocate debug log")
 		}
@@ -87,7 +106,7 @@ func dlog() *dlogger {
 		headp := (*uintptr)(unsafe.Pointer(&allDloggers))
 		for {
 			head := atomic.Loaduintptr(headp)
-			l.allLink = (*dlogger)(unsafe.Pointer(head))
+			l.allLink = (*dloggerImpl)(unsafe.Pointer(head))
 			if atomic.Casuintptr(headp, head, uintptr(unsafe.Pointer(l))) {
 				break
 			}
@@ -119,16 +138,16 @@ func dlog() *dlogger {
 	return l
 }

-// A dlogger writes to the debug log.
+// A dloggerImpl writes to the debug log.
 //
-// To obtain a dlogger, call dlog(). When done with the dlogger, call
+// To obtain a dloggerImpl, call dlog(). When done with the dloggerImpl, call
 // end().
-type dlogger struct {
+type dloggerImpl struct {
 	_ sys.NotInHeap
 	w debugLogWriter

 	// allLink is the next dlogger in the allDloggers list.
-	allLink *dlogger
+	allLink *dloggerImpl

 	// owned indicates that this dlogger is owned by an M. This is
 	// accessed atomically.
@@ -138,14 +157,16 @@ type dlogger struct {
 // allDloggers is a list of all dloggers, linked through
 // dlogger.allLink. This is accessed atomically. This is prepend only,
 // so it doesn't need to protect against ABA races.
-var allDloggers *dlogger
+var allDloggers *dloggerImpl
+
+// A dloggerFake is a no-op implementation of dlogger.
+type dloggerFake struct{}

 //go:nosplit
-func (l *dlogger) end() {
-	if !dlogEnabled {
-		return
-	}
+func (l dloggerFake) end() {}

+//go:nosplit
+func (l *dloggerImpl) end() {
 	// Fill in framing header.
 	size := l.w.write - l.w.r.end
 	if !l.w.writeFrameAt(l.w.r.end, size) {
@@ -181,10 +202,10 @@ const (
 )

 //go:nosplit
-func (l *dlogger) b(x bool) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) b(x bool) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) b(x bool) *dloggerImpl {
 	if x {
 		l.w.byte(debugLogBoolTrue)
 	} else {
@@ -194,85 +215,112 @@ func (l *dlogger) b(x bool) *dlogger {
 }

 //go:nosplit
-func (l *dlogger) i(x int) *dlogger {
+func (l dloggerFake) i(x int) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) i(x int) *dloggerImpl {
 	return l.i64(int64(x))
 }

 //go:nosplit
-func (l *dlogger) i8(x int8) *dlogger {
+func (l dloggerFake) i8(x int8) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) i8(x int8) *dloggerImpl {
 	return l.i64(int64(x))
 }

 //go:nosplit
-func (l *dlogger) i16(x int16) *dlogger {
+func (l dloggerFake) i16(x int16) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) i16(x int16) *dloggerImpl {
 	return l.i64(int64(x))
 }

 //go:nosplit
-func (l *dlogger) i32(x int32) *dlogger {
+func (l dloggerFake) i32(x int32) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) i32(x int32) *dloggerImpl {
 	return l.i64(int64(x))
 }

 //go:nosplit
-func (l *dlogger) i64(x int64) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) i64(x int64) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) i64(x int64) *dloggerImpl {
 	l.w.byte(debugLogInt)
 	l.w.varint(x)
 	return l
 }

 //go:nosplit
-func (l *dlogger) u(x uint) *dlogger {
+func (l dloggerFake) u(x uint) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) u(x uint) *dloggerImpl {
 	return l.u64(uint64(x))
 }

 //go:nosplit
-func (l *dlogger) uptr(x uintptr) *dlogger {
+func (l dloggerFake) uptr(x uintptr) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) uptr(x uintptr) *dloggerImpl {
 	return l.u64(uint64(x))
 }

 //go:nosplit
-func (l *dlogger) u8(x uint8) *dlogger {
+func (l dloggerFake) u8(x uint8) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) u8(x uint8) *dloggerImpl {
 	return l.u64(uint64(x))
 }

 //go:nosplit
-func (l *dlogger) u16(x uint16) *dlogger {
+func (l dloggerFake) u16(x uint16) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) u16(x uint16) *dloggerImpl {
 	return l.u64(uint64(x))
 }

 //go:nosplit
-func (l *dlogger) u32(x uint32) *dlogger {
+func (l dloggerFake) u32(x uint32) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) u32(x uint32) *dloggerImpl {
 	return l.u64(uint64(x))
 }

 //go:nosplit
-func (l *dlogger) u64(x uint64) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) u64(x uint64) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) u64(x uint64) *dloggerImpl {
 	l.w.byte(debugLogUint)
 	l.w.uvarint(x)
 	return l
 }

 //go:nosplit
-func (l *dlogger) hex(x uint64) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) hex(x uint64) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) hex(x uint64) *dloggerImpl {
 	l.w.byte(debugLogHex)
 	l.w.uvarint(x)
 	return l
 }

 //go:nosplit
-func (l *dlogger) p(x any) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) p(x any) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) p(x any) *dloggerImpl {
 	l.w.byte(debugLogPtr)
 	if x == nil {
 		l.w.uvarint(0)
@@ -289,11 +337,10 @@ func (l *dlogger) p(x any) *dlogger {
 }

 //go:nosplit
-func (l *dlogger) s(x string) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) s(x string) dloggerFake { return l }

+//go:nosplit
+func (l *dloggerImpl) s(x string) *dloggerImpl {
 	strData := unsafe.StringData(x)
 	datap := &firstmoduledata
 	if len(x) > 4 && datap.etext <= uintptr(unsafe.Pointer(strData)) && uintptr(unsafe.Pointer(strData)) < datap.end {
@@ -325,20 +372,20 @@ func (l *dlogger) s(x string) *dlogger {
 }

 //go:nosplit
-func (l *dlogger) pc(x uintptr) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) pc(x uintptr) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) pc(x uintptr) *dloggerImpl {
 	l.w.byte(debugLogPC)
 	l.w.uvarint(uint64(x))
 	return l
 }

 //go:nosplit
-func (l *dlogger) traceback(x []uintptr) *dlogger {
-	if !dlogEnabled {
-		return l
-	}
+func (l dloggerFake) traceback(x []uintptr) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) traceback(x []uintptr) *dloggerImpl {
 	l.w.byte(debugLogTraceback)
 	l.w.uvarint(uint64(len(x)))
 	for _, pc := range x {
@@ -693,10 +740,12 @@ func (r *debugLogReader) printVal() bool {

 // printDebugLog prints the debug log.
 func printDebugLog() {
-	if !dlogEnabled {
-		return
+	if dlogEnabled {
+		printDebugLogImpl()
 	}
+}

+func printDebugLogImpl() {
 	// This function should not panic or throw since it is used in
 	// the fatal panic path and this may deadlock.

@@ -704,7 +753,7 @@ func printDebugLog() {

 	// Get the list of all debug logs.
 	allp := (*uintptr)(unsafe.Pointer(&allDloggers))
-	all := (*dlogger)(unsafe.Pointer(atomic.Loaduintptr(allp)))
+	all := (*dloggerImpl)(unsafe.Pointer(atomic.Loaduintptr(allp)))

 	// Count the logs.
 	n := 0
--- a/src/runtime/debuglog_off.go
+++ b/src/runtime/debuglog_off.go
@@ -8,12 +8,18 @@ package runtime

 const dlogEnabled = false

+type dlogger = dloggerFake
+
+func dlog1() dloggerFake {
+	return dlogFake()
+}
+
 type dlogPerM struct{}

-func getCachedDlogger() *dlogger {
+func getCachedDlogger() *dloggerImpl {
 	return nil
 }

-func putCachedDlogger(l *dlogger) bool {
+func putCachedDlogger(l *dloggerImpl) bool {
 	return false
 }
--- a/src/runtime/debuglog_on.go
+++ b/src/runtime/debuglog_on.go
@@ -8,21 +8,32 @@ package runtime

 const dlogEnabled = true

+// dlogger is the underlying implementation of the dlogger interface, selected
+// at build time.
+//
+// We use a type alias instead of struct embedding so that the dlogger type is
+// identical to the type returned by method chaining on the methods of this type.
+type dlogger = *dloggerImpl
+
+func dlog1() *dloggerImpl {
+	return dlogImpl()
+}
+
 // dlogPerM is the per-M debug log data. This is embedded in the m
 // struct.
 type dlogPerM struct {
-	dlogCache *dlogger
+	dlogCache *dloggerImpl
 }

 // getCachedDlogger returns a cached dlogger if it can do so
 // efficiently, or nil otherwise. The returned dlogger will be owned.
-func getCachedDlogger() *dlogger {
+func getCachedDlogger() *dloggerImpl {
 	mp := acquirem()
 	// We don't return a cached dlogger if we're running on the
 	// signal stack in case the signal arrived while in
 	// get/putCachedDlogger. (Too bad we don't have non-atomic
 	// exchange!)
-	var l *dlogger
+	var l *dloggerImpl
 	if getg() != mp.gsignal {
 		l = mp.dlogCache
 		mp.dlogCache = nil
@@ -33,7 +44,7 @@ func getCachedDlogger() *dlogger {

 // putCachedDlogger attempts to return l to the local cache. It
 // returns false if this fails.
-func putCachedDlogger(l *dlogger) bool {
+func putCachedDlogger(l *dloggerImpl) bool {
 	mp := acquirem()
 	if getg() != mp.gsignal && mp.dlogCache == nil {
 		mp.dlogCache = l
--- a/src/runtime/debuglog_test.go
+++ b/src/runtime/debuglog_test.go
@@ -24,18 +24,16 @@ package runtime_test

 import (
 	"fmt"
-	"internal/testenv"
 	"regexp"
 	"runtime"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"testing"
 )

 func skipDebugLog(t *testing.T) {
-	if !runtime.DlogEnabled {
-		t.Skip("debug log disabled (rebuild with -tags debuglog)")
+	if runtime.DlogEnabled {
+		t.Skip("debug log tests disabled to avoid collisions with real debug logs")
 	}
 }

@@ -83,28 +81,63 @@ func TestDebugLogSym(t *testing.T) {
 func TestDebugLogInterleaving(t *testing.T) {
 	skipDebugLog(t)
 	runtime.ResetDebugLog()
-	var wg sync.WaitGroup
-	done := int32(0)
-	wg.Add(1)
-	go func() {
-		// Encourage main goroutine to move around to
-		// different Ms and Ps.
-		for atomic.LoadInt32(&done) == 0 {
-			runtime.Gosched()
-		}
-		wg.Done()
-	}()
-	var want strings.Builder
-	for i := 0; i < 1000; i++ {
-		runtime.Dlog().I(i).End()
-		fmt.Fprintf(&want, "[] %d\n", i)
-		runtime.Gosched()
-	}
-	atomic.StoreInt32(&done, 1)
-	wg.Wait()

+	n1 := runtime.CountDebugLog()
+	t.Logf("number of log shards at start: %d", n1)
+
+	const limit = 1000
+	const concurrency = 10
+
+	// Start several goroutines writing to the log simultaneously.
+	var wg sync.WaitGroup
+	i := 0
+	chans := make([]chan bool, concurrency)
+	for gid := range concurrency {
+		chans[gid] = make(chan bool)
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			var log *runtime.Dlogger
+			for {
+				<-chans[gid]
+				if log != nil {
+					log.End()
+				}
+				next := chans[(gid+1)%len(chans)]
+				if i >= limit {
+					close(next)
+					break
+				}
+				// Log an entry, but *don't* release the log shard until its our
+				// turn again. This should result in at least n=concurrency log
+				// shards.
+				log = runtime.Dlog().I(i)
+				i++
+				// Wake up the next logger goroutine.
+				next <- true
+			}
+		}()
+	}
+	// Start the chain reaction.
+	chans[0] <- true
+
+	// Wait for them to finish and get the log.
+	wg.Wait()
 	gotFull := runtime.DumpDebugLog()
 	got := dlogCanonicalize(gotFull)
+
+	n2 := runtime.CountDebugLog()
+	t.Logf("number of log shards at end: %d", n2)
+	if n2 < concurrency {
+		t.Errorf("created %d log shards, expected >= %d", n2, concurrency)
+	}
+
+	// Construct the desired output.
+	var want strings.Builder
+	for i := 0; i < limit; i++ {
+		fmt.Fprintf(&want, "[] %d\n", i)
+	}
+
 	if got != want.String() {
 		// Since the timestamps are useful in understand
 		// failures of this test, we print the uncanonicalized
@@ -156,14 +189,3 @@ func TestDebugLogLongString(t *testing.T) {
 		t.Fatalf("want %q, got %q", want, got)
 	}
 }
-
-// TestDebugLogBuild verifies that the runtime builds with -tags=debuglog.
-func TestDebugLogBuild(t *testing.T) {
-	testenv.MustHaveGoBuild(t)
-
-	// It doesn't matter which program we build, anything will rebuild the
-	// runtime.
-	if _, err := buildTestProg(t, "testprog", "-tags=debuglog"); err != nil {
-		t.Fatal(err)
-	}
-}
--- a/src/runtime/defer_test.go
+++ b/src/runtime/defer_test.go
@@ -5,8 +5,8 @@
 package runtime_test

 import (
-	"reflect"
 	"runtime"
+	"slices"
 	"testing"
 )

@@ -83,7 +83,7 @@ func TestConditionalDefers(t *testing.T) {
 			t.Fatal("expected panic")
 		}
 		want := []int{4, 2, 1}
-		if !reflect.DeepEqual(want, list) {
+		if !slices.Equal(want, list) {
 			t.Fatalf("wanted %v, got %v", want, list)
 		}

--- a/src/runtime/defs_linux_loong64.go
+++ b/src/runtime/defs_linux_loong64.go
@@ -184,6 +184,7 @@ type sigcontext struct {
 	sc_pc         uint64
 	sc_regs       [32]uint64
 	sc_flags      uint32
+	sc_pad0       [1]uint32
 	sc_extcontext [0]uint64
 }

--- a/src/runtime/error.go
+++ b/src/runtime/error.go
@@ -7,6 +7,7 @@ package runtime
 import (
 	"internal/abi"
 	"internal/bytealg"
+	"internal/runtime/sys"
 )

 // The Error interface identifies a run time error.
@@ -329,7 +330,7 @@ func printindented(s string) {
 //
 // It is called from the generated wrapper code.
 func panicwrap() {
-	pc := getcallerpc()
+	pc := sys.GetCallerPC()
 	name := funcNameForPrint(funcname(findfunc(pc)))
 	// name is something like "main.(*T).F".
 	// We want to extract pkg ("main"), typ ("T"), and meth ("F").
--- a/src/runtime/example_test.go
+++ b/src/runtime/example_test.go
@@ -32,15 +32,14 @@ func ExampleFrames() {
 		for {
 			frame, more := frames.Next()

-			// Process this frame.
-			//
-			// To keep this example's output stable
-			// even if there are changes in the testing package,
-			// stop unwinding when we leave package runtime.
-			if !strings.Contains(frame.File, "runtime/") {
+			// Canonicalize function name and skip callers of this function
+			// for predictable example output.
+			// You probably don't need this in your own code.
+			function := strings.ReplaceAll(frame.Function, "main.main", "runtime_test.ExampleFrames")
+			fmt.Printf("- more:%v | %s\n", more, function)
+			if function == "runtime_test.ExampleFrames" {
 				break
 			}
-			fmt.Printf("- more:%v | %s\n", more, frame.Function)

 			// Check whether there are more frames to process after this one.
 			if !more {
--- a/src/runtime/export_debug_loong64_test.go
+++ b/src/runtime/export_debug_loong64_test.go
@@ -0,0 +1,227 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build loong64 && linux
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/goarch"
+	"unsafe"
+)
+
+type sigContext struct {
+	savedRegs sigcontext
+}
+
+func sigctxtSetContextRegister(ctxt *sigctxt, x uint64) {
+	ctxt.regs().sc_regs[29] = x
+}
+
+func sigctxtAtTrapInstruction(ctxt *sigctxt) bool {
+	return *(*uint32)(unsafe.Pointer(ctxt.sigpc())) == 0x002a0000 // BREAK 0
+}
+
+func sigctxtStatus(ctxt *sigctxt) uint64 {
+	return ctxt.r19()
+}
+
+func (h *debugCallHandler) saveSigContext(ctxt *sigctxt) {
+	sp := ctxt.sp()
+	sp -= goarch.PtrSize
+	ctxt.set_sp(sp)
+	*(*uint64)(unsafe.Pointer(uintptr(sp))) = ctxt.link() // save the current lr
+	ctxt.set_link(ctxt.pc())                              // set new lr to the current pc
+	// Write the argument frame size.
+	*(*uintptr)(unsafe.Pointer(uintptr(sp - 8))) = h.argSize
+	// Save current registers.
+	h.sigCtxt.savedRegs = *ctxt.regs()
+}
+
+// case 0
+func (h *debugCallHandler) debugCallRun(ctxt *sigctxt) {
+	sp := ctxt.sp()
+	memmove(unsafe.Pointer(uintptr(sp)+8), h.argp, h.argSize)
+	if h.regArgs != nil {
+		storeRegArgs(ctxt.regs(), h.regArgs)
+	}
+	// Push return PC, which should be the signal PC+4, because
+	// the signal PC is the PC of the trap instruction itself.
+	ctxt.set_link(ctxt.pc() + 4)
+	// Set PC to call and context register.
+	ctxt.set_pc(uint64(h.fv.fn))
+	sigctxtSetContextRegister(ctxt, uint64(uintptr(unsafe.Pointer(h.fv))))
+}
+
+// case 1
+func (h *debugCallHandler) debugCallReturn(ctxt *sigctxt) {
+	sp := ctxt.sp()
+	memmove(h.argp, unsafe.Pointer(uintptr(sp)+8), h.argSize)
+	if h.regArgs != nil {
+		loadRegArgs(h.regArgs, ctxt.regs())
+	}
+	// Restore the old lr from *sp
+	olr := *(*uint64)(unsafe.Pointer(uintptr(sp)))
+	ctxt.set_link(olr)
+	pc := ctxt.pc()
+	ctxt.set_pc(pc + 4) // step to next instruction
+}
+
+// case 2
+func (h *debugCallHandler) debugCallPanicOut(ctxt *sigctxt) {
+	sp := ctxt.sp()
+	memmove(unsafe.Pointer(&h.panic), unsafe.Pointer(uintptr(sp)+8), 2*goarch.PtrSize)
+	ctxt.set_pc(ctxt.pc() + 4)
+}
+
+// case 8
+func (h *debugCallHandler) debugCallUnsafe(ctxt *sigctxt) {
+	sp := ctxt.sp()
+	reason := *(*string)(unsafe.Pointer(uintptr(sp) + 8))
+	h.err = plainError(reason)
+	ctxt.set_pc(ctxt.pc() + 4)
+}
+
+// case 16
+func (h *debugCallHandler) restoreSigContext(ctxt *sigctxt) {
+	// Restore all registers except for pc and sp
+	pc, sp := ctxt.pc(), ctxt.sp()
+	*ctxt.regs() = h.sigCtxt.savedRegs
+	ctxt.set_pc(pc + 4)
+	ctxt.set_sp(sp)
+}
+
+func getVal32(base uintptr, off uintptr) uint32 {
+	return *(*uint32)(unsafe.Pointer(base + off))
+}
+
+func getVal64(base uintptr, off uintptr) uint64 {
+	return *(*uint64)(unsafe.Pointer(base + off))
+}
+
+func setVal64(base uintptr, off uintptr, val uint64) {
+	*(*uint64)(unsafe.Pointer(base + off)) = val
+}
+
+// Layout for sigcontext on linux/loong64: arch/loongarch/include/uapi/asm/sigcontext.h
+//
+//  sc_extcontext |  sctx_info
+// ------------------------------------------
+//                |  {fpu,lsx,lasx}_context
+//                ---------------------------
+//                |  sctx_info
+//                ---------------------------
+//                |  lbt_context
+//
+
+const (
+	INVALID_MAGIC  uint32 = 0
+	FPU_CTX_MAGIC         = 0x46505501
+	LSX_CTX_MAGIC         = 0x53580001
+	LASX_CTX_MAGIC        = 0x41535801
+	LBT_CTX_MAGIC         = 0x42540001
+)
+
+const (
+	SCTX_INFO_SIZE = 4 + 4 + 8
+	FPU_CTX_SIZE   = 8*32 + 8 + 4  // fpu context size
+	LSX_CTX_SIZE   = 8*64 + 8 + 4  // lsx context size
+	LASX_CTX_SIZE  = 8*128 + 8 + 4 // lasx context size
+	LBT_CTX_SIZE   = 8*4 + 4 + 4   // lbt context size
+)
+
+// storeRegArgs sets up argument registers in the signal context state
+// from an abi.RegArgs.
+//
+// Both src and dst must be non-nil.
+func storeRegArgs(dst *sigcontext, src *abi.RegArgs) {
+	// R4..R19 are used to pass int arguments in registers on loong64
+	for i := 0; i < abi.IntArgRegs; i++ {
+		dst.sc_regs[i+4] = (uint64)(src.Ints[i])
+	}
+
+	// F0..F15 are used to pass float arguments in registers on loong64
+	offset := (uintptr)(0)
+	baseAddr := (uintptr)(unsafe.Pointer(&dst.sc_extcontext))
+
+	for {
+		magic := getVal32(baseAddr, offset)
+		size := getVal32(baseAddr, offset+4)
+
+		switch magic {
+		case INVALID_MAGIC:
+			return
+
+		case FPU_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				setVal64(baseAddr, ((uintptr)(i*8) + offset), src.Floats[i])
+			}
+			return
+
+		case LSX_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				setVal64(baseAddr, ((uintptr)(i*16) + offset), src.Floats[i])
+			}
+			return
+
+		case LASX_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				setVal64(baseAddr, ((uintptr)(i*32) + offset), src.Floats[i])
+			}
+			return
+
+		case LBT_CTX_MAGIC:
+			offset += uintptr(size)
+		}
+	}
+}
+
+func loadRegArgs(dst *abi.RegArgs, src *sigcontext) {
+	// R4..R19 are used to pass int arguments in registers on loong64
+	for i := 0; i < abi.IntArgRegs; i++ {
+		dst.Ints[i] = uintptr(src.sc_regs[i+4])
+	}
+
+	// F0..F15 are used to pass float arguments in registers on loong64
+	offset := (uintptr)(0)
+	baseAddr := (uintptr)(unsafe.Pointer(&src.sc_extcontext))
+
+	for {
+		magic := getVal32(baseAddr, offset)
+		size := getVal32(baseAddr, (offset + 4))
+
+		switch magic {
+		case INVALID_MAGIC:
+			return
+
+		case FPU_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				dst.Floats[i] = getVal64(baseAddr, (uintptr(i*8) + offset))
+			}
+			return
+
+		case LSX_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				dst.Floats[i] = getVal64(baseAddr, (uintptr(i*16) + offset))
+			}
+			return
+
+		case LASX_CTX_MAGIC:
+			offset += SCTX_INFO_SIZE
+			for i := 0; i < abi.FloatArgRegs; i++ {
+				dst.Floats[i] = getVal64(baseAddr, (uintptr(i*32) + offset))
+			}
+			return
+
+		case LBT_CTX_MAGIC:
+			offset += uintptr(size)
+		}
+	}
+}
--- a/src/runtime/export_debug_test.go
+++ b/src/runtime/export_debug_test.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-//go:build (amd64 || arm64 || ppc64le) && linux
+//go:build (amd64 || arm64 || loong64 || ppc64le) && linux

 package runtime

--- a/src/runtime/export_debuglog_test.go
+++ b/src/runtime/export_debuglog_test.go
@@ -12,22 +12,26 @@ const DebugLogBytes = debugLogBytes

 const DebugLogStringLimit = debugLogStringLimit

-var Dlog = dlog
+type Dlogger = dloggerImpl

-func (l *dlogger) End()                  { l.end() }
-func (l *dlogger) B(x bool) *dlogger     { return l.b(x) }
-func (l *dlogger) I(x int) *dlogger      { return l.i(x) }
-func (l *dlogger) I16(x int16) *dlogger  { return l.i16(x) }
-func (l *dlogger) U64(x uint64) *dlogger { return l.u64(x) }
-func (l *dlogger) Hex(x uint64) *dlogger { return l.hex(x) }
-func (l *dlogger) P(x any) *dlogger      { return l.p(x) }
-func (l *dlogger) S(x string) *dlogger   { return l.s(x) }
-func (l *dlogger) PC(x uintptr) *dlogger { return l.pc(x) }
+func Dlog() *Dlogger {
+	return dlogImpl()
+}
+
+func (l *dloggerImpl) End()                      { l.end() }
+func (l *dloggerImpl) B(x bool) *dloggerImpl     { return l.b(x) }
+func (l *dloggerImpl) I(x int) *dloggerImpl      { return l.i(x) }
+func (l *dloggerImpl) I16(x int16) *dloggerImpl  { return l.i16(x) }
+func (l *dloggerImpl) U64(x uint64) *dloggerImpl { return l.u64(x) }
+func (l *dloggerImpl) Hex(x uint64) *dloggerImpl { return l.hex(x) }
+func (l *dloggerImpl) P(x any) *dloggerImpl      { return l.p(x) }
+func (l *dloggerImpl) S(x string) *dloggerImpl   { return l.s(x) }
+func (l *dloggerImpl) PC(x uintptr) *dloggerImpl { return l.pc(x) }

 func DumpDebugLog() string {
 	gp := getg()
 	gp.writebuf = make([]byte, 0, 1<<20)
-	printDebugLog()
+	printDebugLogImpl()
 	buf := gp.writebuf
 	gp.writebuf = nil

@@ -44,3 +48,13 @@ func ResetDebugLog() {
 	}
 	startTheWorld(stw)
 }
+
+func CountDebugLog() int {
+	stw := stopTheWorld(stwForTestResetDebugLog)
+	i := 0
+	for l := allDloggers; l != nil; l = l.allLink {
+		i++
+	}
+	startTheWorld(stw)
+	return i
+}
--- a/src/runtime/export_map_noswiss_test.go
+++ b/src/runtime/export_map_noswiss_test.go
@@ -0,0 +1,64 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"unsafe"
+)
+
+const RuntimeHmapSize = unsafe.Sizeof(hmap{})
+
+func OverLoadFactor(count int, B uint8) bool {
+	return overLoadFactor(count, B)
+}
+
+func MapBucketsCount(m map[int]int) int {
+	h := *(**hmap)(unsafe.Pointer(&m))
+	return 1 << h.B
+}
+
+func MapBucketsPointerIsNil(m map[int]int) bool {
+	h := *(**hmap)(unsafe.Pointer(&m))
+	return h.buckets == nil
+}
+
+func MapTombstoneCheck(m map[int]int) {
+	// Make sure emptyOne and emptyRest are distributed correctly.
+	// We should have a series of filled and emptyOne cells, followed by
+	// a series of emptyRest cells.
+	h := *(**hmap)(unsafe.Pointer(&m))
+	i := any(m)
+	t := *(**maptype)(unsafe.Pointer(&i))
+
+	for x := 0; x < 1<<h.B; x++ {
+		b0 := (*bmap)(add(h.buckets, uintptr(x)*uintptr(t.BucketSize)))
+		n := 0
+		for b := b0; b != nil; b = b.overflow(t) {
+			for i := 0; i < abi.OldMapBucketCount; i++ {
+				if b.tophash[i] != emptyRest {
+					n++
+				}
+			}
+		}
+		k := 0
+		for b := b0; b != nil; b = b.overflow(t) {
+			for i := 0; i < abi.OldMapBucketCount; i++ {
+				if k < n && b.tophash[i] == emptyRest {
+					panic("early emptyRest")
+				}
+				if k >= n && b.tophash[i] != emptyRest {
+					panic("late non-emptyRest")
+				}
+				if k == n-1 && b.tophash[i] == emptyOne {
+					panic("last non-emptyRest entry is emptyOne")
+				}
+				k++
+			}
+		}
+	}
+}
--- a/src/runtime/export_map_swiss_test.go
+++ b/src/runtime/export_map_swiss_test.go
@@ -0,0 +1,11 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+func MapTombstoneCheck(m map[int]int) {
+	// TODO
+}
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -11,7 +11,7 @@ import (
 	"internal/goarch"
 	"internal/goos"
 	"internal/runtime/atomic"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -94,9 +94,9 @@ func Netpoll(delta int64) {
 	})
 }

-func GCMask(x any) (ret []byte) {
+func PointerMask(x any) (ret []byte) {
 	systemstack(func() {
-		ret = getgcmask(x)
+		ret = pointerMask(x)
 	})
 	return
 }
@@ -481,22 +481,6 @@ func (rw *RWMutex) Unlock() {
 	rw.rw.unlock()
 }

-const RuntimeHmapSize = unsafe.Sizeof(hmap{})
-
-func MapBucketsCount(m map[int]int) int {
-	h := *(**hmap)(unsafe.Pointer(&m))
-	return 1 << h.B
-}
-
-func MapBucketsPointerIsNil(m map[int]int) bool {
-	h := *(**hmap)(unsafe.Pointer(&m))
-	return h.buckets == nil
-}
-
-func OverLoadFactor(count int, B uint8) bool {
-	return overLoadFactor(count, B)
-}
-
 func LockOSCounts() (external, internal uint32) {
 	gp := getg()
 	if gp.m.lockedExt+gp.m.lockedInt == 0 {
@@ -514,7 +498,7 @@ func LockOSCounts() (external, internal uint32) {
 //go:noinline
 func TracebackSystemstack(stk []uintptr, i int) int {
 	if i == 0 {
-		pc, sp := getcallerpc(), getcallersp()
+		pc, sp := sys.GetCallerPC(), sys.GetCallerSP()
 		var u unwinder
 		u.initAt(pc, sp, 0, getg(), unwindJumpStack) // Don't ignore errors, for testing
 		return tracebackPCs(&u, 0, stk)
@@ -597,7 +581,7 @@ func unexportedPanicForTesting(b []byte, i int) byte {
 func G0StackOverflow() {
 	systemstack(func() {
 		g0 := getg()
-		sp := getcallersp()
+		sp := sys.GetCallerSP()
 		// The stack bounds for g0 stack is not always precise.
 		// Use an artificially small stack, to trigger a stack overflow
 		// without actually run out of the system stack (which may seg fault).
@@ -614,42 +598,6 @@ func stackOverflow(x *byte) {
 	stackOverflow(&buf[0])
 }

-func MapTombstoneCheck(m map[int]int) {
-	// Make sure emptyOne and emptyRest are distributed correctly.
-	// We should have a series of filled and emptyOne cells, followed by
-	// a series of emptyRest cells.
-	h := *(**hmap)(unsafe.Pointer(&m))
-	i := any(m)
-	t := *(**maptype)(unsafe.Pointer(&i))
-
-	for x := 0; x < 1<<h.B; x++ {
-		b0 := (*bmap)(add(h.buckets, uintptr(x)*uintptr(t.BucketSize)))
-		n := 0
-		for b := b0; b != nil; b = b.overflow(t) {
-			for i := 0; i < abi.MapBucketCount; i++ {
-				if b.tophash[i] != emptyRest {
-					n++
-				}
-			}
-		}
-		k := 0
-		for b := b0; b != nil; b = b.overflow(t) {
-			for i := 0; i < abi.MapBucketCount; i++ {
-				if k < n && b.tophash[i] == emptyRest {
-					panic("early emptyRest")
-				}
-				if k >= n && b.tophash[i] != emptyRest {
-					panic("late non-emptyRest")
-				}
-				if k == n-1 && b.tophash[i] == emptyOne {
-					panic("last non-emptyRest entry is emptyOne")
-				}
-				k++
-			}
-		}
-	}
-}
-
 func RunGetgThreadSwitchTest() {
 	// Test that getg works correctly with thread switch.
 	// With gccgo, if we generate getg inlined, the backend
@@ -1223,6 +1171,9 @@ func PageCachePagesLeaked() (leaked uintptr) {
 	return
 }

+var ProcYield = procyield
+var OSYield = osyield
+
 type Mutex = mutex

 var Lock = lock
@@ -1313,7 +1264,7 @@ const (

 type TimeHistogram timeHistogram

-// Counts returns the counts for the given bucket, subBucket indices.
+// Count returns the counts for the given bucket, subBucket indices.
 // Returns true if the bucket was valid, otherwise returns the counts
 // for the overflow bucket if bucket > 0 or the underflow bucket if
 // bucket < 0, and false.
@@ -1913,3 +1864,18 @@ func GCMarkDoneResetRestartFlag() {
 	gcDebugMarkDone.restartedDueTo27993 = false
 	releasem(mp)
 }
+
+type BitCursor struct {
+	b bitCursor
+}
+
+func NewBitCursor(buf *byte) BitCursor {
+	return BitCursor{b: bitCursor{ptr: buf, n: 0}}
+}
+
+func (b BitCursor) Write(data *byte, cnt uintptr) {
+	b.b.write(data, cnt)
+}
+func (b BitCursor) Offset(cnt uintptr) BitCursor {
+	return BitCursor{b: b.b.offset(cnt)}
+}
--- a/src/runtime/export_windows_test.go
+++ b/src/runtime/export_windows_test.go
@@ -6,7 +6,10 @@

 package runtime

-import "unsafe"
+import (
+	"internal/runtime/sys"
+	"unsafe"
+)

 const MaxArgs = maxArgs

@@ -31,8 +34,8 @@ func (c ContextStub) GetPC() uintptr {

 func NewContextStub() *ContextStub {
 	var ctx context
-	ctx.set_ip(getcallerpc())
-	ctx.set_sp(getcallersp())
+	ctx.set_ip(sys.GetCallerPC())
+	ctx.set_sp(sys.GetCallerSP())
 	ctx.set_fp(getcallerfp())
 	return &ContextStub{ctx}
 }
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -294,10 +294,11 @@ import (

 // Caller reports file and line number information about function invocations on
 // the calling goroutine's stack. The argument skip is the number of stack frames
-// to ascend, with 0 identifying the caller of Caller.  (For historical reasons the
-// meaning of skip differs between Caller and [Callers].) The return values report the
-// program counter, file name, and line number within the file of the corresponding
-// call. The boolean ok is false if it was not possible to recover the information.
+// to ascend, with 0 identifying the caller of Caller. (For historical reasons the
+// meaning of skip differs between Caller and [Callers].) The return values report
+// the program counter, the file name (using forward slashes as path separator, even
+// on Windows), and the line number within the file of the corresponding call.
+// The boolean ok is false if it was not possible to recover the information.
 func Caller(skip int) (pc uintptr, file string, line int, ok bool) {
 	rpc := make([]uintptr, 1)
 	n := callers(skip+1, rpc)
@@ -336,6 +337,11 @@ var defaultGOROOT string // set by cmd/link
 // GOROOT returns the root of the Go tree. It uses the
 // GOROOT environment variable, if set at process start,
 // or else the root used during the Go build.
+//
+// Deprecated: The root used during the Go build will not be
+// meaningful if the binary is copied to another machine.
+// Use the system path to locate the “go” binary, and use
+// “go env GOROOT” to find its GOROOT.
 func GOROOT() string {
 	s := gogetenv("GOROOT")
 	if s != "" {
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -6,8 +6,8 @@ package runtime_test

 import (
 	"fmt"
+	"internal/asan"
 	"internal/testenv"
-	"internal/weak"
 	"math/bits"
 	"math/rand"
 	"os"
@@ -21,6 +21,7 @@ import (
 	"testing"
 	"time"
 	"unsafe"
+	"weak"
 )

 func TestGcSys(t *testing.T) {
@@ -210,6 +211,9 @@ func TestGcZombieReporting(t *testing.T) {
 }

 func TestGCTestMoveStackOnNextCall(t *testing.T) {
+	if asan.Enabled {
+		t.Skip("extra allocations with -asan causes this to fail; see #70079")
+	}
 	t.Parallel()
 	var onStack int
 	// GCTestMoveStackOnNextCall can fail in rare cases if there's
@@ -300,6 +304,9 @@ var pointerClassBSS *int
 var pointerClassData = 42

 func TestGCTestPointerClass(t *testing.T) {
+	if asan.Enabled {
+		t.Skip("extra allocations cause this test to fail; see #70079")
+	}
 	t.Parallel()
 	check := func(p unsafe.Pointer, want string) {
 		t.Helper()
@@ -736,7 +743,7 @@ func BenchmarkMSpanCountAlloc(b *testing.B) {
 	// always rounded up 8 bytes.
 	for _, n := range []int{8, 16, 32, 64, 128} {
 		b.Run(fmt.Sprintf("bits=%d", n*8), func(b *testing.B) {
-			// Initialize a new byte slice with pseduo-random data.
+			// Initialize a new byte slice with pseudo-random data.
 			bits := make([]byte, n)
 			rand.Read(bits)

@@ -819,7 +826,7 @@ func TestWeakToStrongMarkTermination(t *testing.T) {

 	// Start a GC, and wait a little bit to get something spinning in mark termination.
 	// Simultaneously, fire off another goroutine to disable spinning. If everything's
-	// working correctly, then weak.Strong will block, so we need to make sure something
+	// working correctly, then weak.Value will block, so we need to make sure something
 	// prevents the GC from continuing to spin.
 	done := make(chan struct{})
 	go func() {
@@ -827,7 +834,11 @@ func TestWeakToStrongMarkTermination(t *testing.T) {
 		done <- struct{}{}
 	}()
 	go func() {
-		time.Sleep(100 * time.Millisecond)
+		// Usleep here instead of time.Sleep. time.Sleep
+		// can allocate, and if we get unlucky, then it
+		// can end up stuck in gcMarkDone with nothing to
+		// wake it.
+		runtime.Usleep(100000) // 100ms

 		// Let mark termination continue.
 		runtime.SetSpinInGCMarkDone(false)
@@ -840,7 +851,7 @@ func TestWeakToStrongMarkTermination(t *testing.T) {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			wp.Strong()
+			wp.Value()
 		}()
 	}

--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -90,7 +90,7 @@ func TestGCInfo(t *testing.T) {
 }

 func verifyGCInfo(t *testing.T, name string, p any, mask0 []byte) {
-	mask := runtime.GCMask(p)
+	mask := runtime.PointerMask(p)
 	if bytes.HasPrefix(mask, mask0) {
 		// Just the prefix matching is OK.
 		//
--- a/src/runtime/hash64.go
+++ b/src/runtime/hash64.go
@@ -10,7 +10,7 @@
 package runtime

 import (
-	"runtime/internal/math"
+	"internal/runtime/math"
 	"unsafe"
 )

--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -205,7 +205,7 @@ func dumptype(t *_type) {
 		dwritebyte('.')
 		dwrite(unsafe.Pointer(unsafe.StringData(name)), uintptr(len(name)))
 	}
-	dumpbool(t.Kind_&abi.KindDirectIface == 0 || t.PtrBytes != 0)
+	dumpbool(t.Kind_&abi.KindDirectIface == 0 || t.Pointers())
 }

 // dump an object.
--- a/src/runtime/histogram.go
+++ b/src/runtime/histogram.go
@@ -6,7 +6,7 @@ package runtime

 import (
 	"internal/runtime/atomic"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -8,7 +8,7 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/runtime/atomic"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -333,7 +333,7 @@ var (
 // be used as the second word of an interface value.
 func convT(t *_type, v unsafe.Pointer) unsafe.Pointer {
 	if raceenabled {
-		raceReadObjectPC(t, v, getcallerpc(), abi.FuncPCABIInternal(convT))
+		raceReadObjectPC(t, v, sys.GetCallerPC(), abi.FuncPCABIInternal(convT))
 	}
 	if msanenabled {
 		msanread(v, t.Size_)
@@ -348,7 +348,7 @@ func convT(t *_type, v unsafe.Pointer) unsafe.Pointer {
 func convTnoptr(t *_type, v unsafe.Pointer) unsafe.Pointer {
 	// TODO: maybe take size instead of type?
 	if raceenabled {
-		raceReadObjectPC(t, v, getcallerpc(), abi.FuncPCABIInternal(convTnoptr))
+		raceReadObjectPC(t, v, sys.GetCallerPC(), abi.FuncPCABIInternal(convTnoptr))
 	}
 	if msanenabled {
 		msanread(v, t.Size_)
@@ -692,39 +692,16 @@ func iterate_itabs(fn func(*itab)) {
 }

 // staticuint64s is used to avoid allocating in convTx for small integer values.
-var staticuint64s = [...]uint64{
-	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
-	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
-	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
-	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
-	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
-	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
-	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
-	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
-	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
-	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
-	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
-	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
-	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
-	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
-	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+// staticuint64s[0] == 0, staticuint64s[1] == 1, and so forth.
+// It is defined in assembler code so that it is read-only.
+var staticuint64s [256]uint64
+
+// getStaticuint64s is called by the reflect package to get a pointer
+// to the read-only array.
+//
+//go:linkname getStaticuint64s
+func getStaticuint64s() *[256]uint64 {
+	return &staticuint64s
 }

 // The linker redirects a reference of a method that it determined
--- a/src/runtime/internal/math/math.go
+++ b/src/runtime/internal/math/math.go
@@ -1,55 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package math
-
-import "internal/goarch"
-
-const MaxUintptr = ^uintptr(0)
-
-// MulUintptr returns a * b and whether the multiplication overflowed.
-// On supported platforms this is an intrinsic lowered by the compiler.
-func MulUintptr(a, b uintptr) (uintptr, bool) {
-	if a|b < 1<<(4*goarch.PtrSize) || a == 0 {
-		return a * b, false
-	}
-	overflow := b > MaxUintptr/a
-	return a * b, overflow
-}
-
-// Mul64 returns the 128-bit product of x and y: (hi, lo) = x * y
-// with the product bits' upper half returned in hi and the lower
-// half returned in lo.
-// This is a copy from math/bits.Mul64
-// On supported platforms this is an intrinsic lowered by the compiler.
-func Mul64(x, y uint64) (hi, lo uint64) {
-	const mask32 = 1<<32 - 1
-	x0 := x & mask32
-	x1 := x >> 32
-	y0 := y & mask32
-	y1 := y >> 32
-	w0 := x0 * y0
-	t := x1*y0 + w0>>32
-	w1 := t & mask32
-	w2 := t >> 32
-	w1 += x0 * y1
-	hi = x1*y1 + w2 + w1>>32
-	lo = x * y
-	return
-}
-
-// Add64 returns the sum with carry of x, y and carry: sum = x + y + carry.
-// The carry input must be 0 or 1; otherwise the behavior is undefined.
-// The carryOut output is guaranteed to be 0 or 1.
-//
-// This function's execution time does not depend on the inputs.
-// On supported platforms this is an intrinsic lowered by the compiler.
-func Add64(x, y, carry uint64) (sum, carryOut uint64) {
-	sum = x + y + carry
-	// The sum will overflow if both top bits are set (x & y) or if one of them
-	// is (x | y), and a carry from the lower place happened. If such a carry
-	// happens, the top bit will be 1 + 0 + 1 = 0 (&^ sum).
-	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
-	return
-}
--- a/src/runtime/internal/math/math_test.go
+++ b/src/runtime/internal/math/math_test.go
@@ -1,79 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package math_test
-
-import (
-	. "runtime/internal/math"
-	"testing"
-)
-
-const (
-	UintptrSize = 32 << (^uintptr(0) >> 63)
-)
-
-type mulUintptrTest struct {
-	a        uintptr
-	b        uintptr
-	overflow bool
-}
-
-var mulUintptrTests = []mulUintptrTest{
-	{0, 0, false},
-	{1000, 1000, false},
-	{MaxUintptr, 0, false},
-	{MaxUintptr, 1, false},
-	{MaxUintptr / 2, 2, false},
-	{MaxUintptr / 2, 3, true},
-	{MaxUintptr, 10, true},
-	{MaxUintptr, 100, true},
-	{MaxUintptr / 100, 100, false},
-	{MaxUintptr / 1000, 1001, true},
-	{1<<(UintptrSize/2) - 1, 1<<(UintptrSize/2) - 1, false},
-	{1 << (UintptrSize / 2), 1 << (UintptrSize / 2), true},
-	{MaxUintptr >> 32, MaxUintptr >> 32, false},
-	{MaxUintptr, MaxUintptr, true},
-}
-
-func TestMulUintptr(t *testing.T) {
-	for _, test := range mulUintptrTests {
-		a, b := test.a, test.b
-		for i := 0; i < 2; i++ {
-			mul, overflow := MulUintptr(a, b)
-			if mul != a*b || overflow != test.overflow {
-				t.Errorf("MulUintptr(%v, %v) = %v, %v want %v, %v",
-					a, b, mul, overflow, a*b, test.overflow)
-			}
-			a, b = b, a
-		}
-	}
-}
-
-var SinkUintptr uintptr
-var SinkBool bool
-
-var x, y uintptr
-
-func BenchmarkMulUintptr(b *testing.B) {
-	x, y = 1, 2
-	b.Run("small", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			var overflow bool
-			SinkUintptr, overflow = MulUintptr(x, y)
-			if overflow {
-				SinkUintptr = 0
-			}
-		}
-	})
-	x, y = MaxUintptr, MaxUintptr-1
-	b.Run("large", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			var overflow bool
-			SinkUintptr, overflow = MulUintptr(x, y)
-			if overflow {
-				SinkUintptr = 0
-			}
-		}
-	})
-}
--- a/src/runtime/internal/sys/consts.go
+++ b/src/runtime/internal/sys/consts.go
@@ -1,36 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys
-
-import (
-	"internal/goarch"
-	"internal/goos"
-)
-
-// AIX requires a larger stack for syscalls.
-// The race build also needs more stack. See issue 54291.
-// This arithmetic must match that in cmd/internal/objabi/stack.go:stackGuardMultiplier.
-const StackGuardMultiplier = 1 + goos.IsAix + isRace
-
-// DefaultPhysPageSize is the default physical page size.
-const DefaultPhysPageSize = goarch.DefaultPhysPageSize
-
-// PCQuantum is the minimal unit for a program counter (1 on x86, 4 on most other systems).
-// The various PC tables record PC deltas pre-divided by PCQuantum.
-const PCQuantum = goarch.PCQuantum
-
-// Int64Align is the required alignment for a 64-bit integer (4 on 32-bit systems, 8 on 64-bit).
-const Int64Align = goarch.PtrSize
-
-// MinFrameSize is the size of the system-reserved words at the bottom
-// of a frame (just above the architectural stack pointer).
-// It is zero on x86 and PtrSize on most non-x86 (LR-based) systems.
-// On PowerPC it is larger, to cover three more reserved words:
-// the compiler word, the link editor word, and the TOC save word.
-const MinFrameSize = goarch.MinFrameSize
-
-// StackAlign is the required alignment of the SP register.
-// The stack must be at least word aligned, but some architectures require more.
-const StackAlign = goarch.StackAlign
--- a/src/runtime/internal/sys/consts_norace.go
+++ b/src/runtime/internal/sys/consts_norace.go
@@ -1,9 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !race
-
-package sys
-
-const isRace = 0
--- a/src/runtime/internal/sys/consts_race.go
+++ b/src/runtime/internal/sys/consts_race.go
@@ -1,9 +0,0 @@
-// Copyright 2022 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build race
-
-package sys
-
-const isRace = 1
--- a/src/runtime/internal/sys/intrinsics.go
+++ b/src/runtime/internal/sys/intrinsics.go
@@ -1,208 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys
-
-// Copied from math/bits to avoid dependence.
-
-var deBruijn32tab = [32]byte{
-	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
-}
-
-const deBruijn32 = 0x077CB531
-
-var deBruijn64tab = [64]byte{
-	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
-	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
-	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
-	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
-}
-
-const deBruijn64 = 0x03f79d71b4ca8b09
-
-const ntz8tab = "" +
-	"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
-	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
-
-// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
-func TrailingZeros32(x uint32) int {
-	if x == 0 {
-		return 32
-	}
-	// see comment in TrailingZeros64
-	return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
-}
-
-// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
-func TrailingZeros64(x uint64) int {
-	if x == 0 {
-		return 64
-	}
-	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
-	//
-	// x & -x leaves only the right-most bit set in the word. Let k be the
-	// index of that bit. Since only a single bit is set, the value is two
-	// to the power of k. Multiplying by a power of two is equivalent to
-	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
-	// is such that all six bit, consecutive substrings are distinct.
-	// Therefore, if we have a left shifted version of this constant we can
-	// find by how many bits it was shifted by looking at which six bit
-	// substring ended up at the top of the word.
-	// (Knuth, volume 4, section 7.3.1)
-	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
-}
-
-// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
-func TrailingZeros8(x uint8) int {
-	return int(ntz8tab[x])
-}
-
-const len8tab = "" +
-	"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
-	"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
-	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
-	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
-	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
-	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
-	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
-	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
-	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
-
-// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
-//
-// nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
-//
-//go:nosplit
-func Len64(x uint64) (n int) {
-	if x >= 1<<32 {
-		x >>= 32
-		n = 32
-	}
-	if x >= 1<<16 {
-		x >>= 16
-		n += 16
-	}
-	if x >= 1<<8 {
-		x >>= 8
-		n += 8
-	}
-	return n + int(len8tab[x])
-}
-
-// --- OnesCount ---
-
-const m0 = 0x5555555555555555 // 01010101 ...
-const m1 = 0x3333333333333333 // 00110011 ...
-const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
-
-// OnesCount64 returns the number of one bits ("population count") in x.
-func OnesCount64(x uint64) int {
-	// Implementation: Parallel summing of adjacent bits.
-	// See "Hacker's Delight", Chap. 5: Counting Bits.
-	// The following pattern shows the general approach:
-	//
-	//   x = x>>1&(m0&m) + x&(m0&m)
-	//   x = x>>2&(m1&m) + x&(m1&m)
-	//   x = x>>4&(m2&m) + x&(m2&m)
-	//   x = x>>8&(m3&m) + x&(m3&m)
-	//   x = x>>16&(m4&m) + x&(m4&m)
-	//   x = x>>32&(m5&m) + x&(m5&m)
-	//   return int(x)
-	//
-	// Masking (& operations) can be left away when there's no
-	// danger that a field's sum will carry over into the next
-	// field: Since the result cannot be > 64, 8 bits is enough
-	// and we can ignore the masks for the shifts by 8 and up.
-	// Per "Hacker's Delight", the first line can be simplified
-	// more, but it saves at best one instruction, so we leave
-	// it alone for clarity.
-	const m = 1<<64 - 1
-	x = x>>1&(m0&m) + x&(m0&m)
-	x = x>>2&(m1&m) + x&(m1&m)
-	x = (x>>4 + x) & (m2 & m)
-	x += x >> 8
-	x += x >> 16
-	x += x >> 32
-	return int(x) & (1<<7 - 1)
-}
-
-// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
-func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
-
-// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
-func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
-
-// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
-func Len8(x uint8) int {
-	return int(len8tab[x])
-}
-
-// Bswap64 returns its input with byte order reversed
-// 0x0102030405060708 -> 0x0807060504030201
-func Bswap64(x uint64) uint64 {
-	c8 := uint64(0x00ff00ff00ff00ff)
-	a := x >> 8 & c8
-	b := (x & c8) << 8
-	x = a | b
-	c16 := uint64(0x0000ffff0000ffff)
-	a = x >> 16 & c16
-	b = (x & c16) << 16
-	x = a | b
-	c32 := uint64(0x00000000ffffffff)
-	a = x >> 32 & c32
-	b = (x & c32) << 32
-	x = a | b
-	return x
-}
-
-// Bswap32 returns its input with byte order reversed
-// 0x01020304 -> 0x04030201
-func Bswap32(x uint32) uint32 {
-	c8 := uint32(0x00ff00ff)
-	a := x >> 8 & c8
-	b := (x & c8) << 8
-	x = a | b
-	c16 := uint32(0x0000ffff)
-	a = x >> 16 & c16
-	b = (x & c16) << 16
-	x = a | b
-	return x
-}
-
-// Prefetch prefetches data from memory addr to cache
-//
-// AMD64: Produce PREFETCHT0 instruction
-//
-// ARM64: Produce PRFM instruction with PLDL1KEEP option
-func Prefetch(addr uintptr) {}
-
-// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
-// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
-//
-// AMD64: Produce PREFETCHNTA instruction
-//
-// ARM64: Produce PRFM instruction with PLDL1STRM option
-func PrefetchStreamed(addr uintptr) {}
--- a/src/runtime/internal/sys/intrinsics_test.go
+++ b/src/runtime/internal/sys/intrinsics_test.go
@@ -1,42 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys_test
-
-import (
-	"runtime/internal/sys"
-	"testing"
-)
-
-func TestTrailingZeros64(t *testing.T) {
-	for i := 0; i <= 64; i++ {
-		x := uint64(5) << uint(i)
-		if got := sys.TrailingZeros64(x); got != i {
-			t.Errorf("TrailingZeros64(%d)=%d, want %d", x, got, i)
-		}
-	}
-}
-func TestTrailingZeros32(t *testing.T) {
-	for i := 0; i <= 32; i++ {
-		x := uint32(5) << uint(i)
-		if got := sys.TrailingZeros32(x); got != i {
-			t.Errorf("TrailingZeros32(%d)=%d, want %d", x, got, i)
-		}
-	}
-}
-
-func TestBswap64(t *testing.T) {
-	x := uint64(0x1122334455667788)
-	y := sys.Bswap64(x)
-	if y != 0x8877665544332211 {
-		t.Errorf("Bswap(%x)=%x, want 0x8877665544332211", x, y)
-	}
-}
-func TestBswap32(t *testing.T) {
-	x := uint32(0x11223344)
-	y := sys.Bswap32(x)
-	if y != 0x44332211 {
-		t.Errorf("Bswap(%x)=%x, want 0x44332211", x, y)
-	}
-}
--- a/src/runtime/internal/sys/nih.go
+++ b/src/runtime/internal/sys/nih.go
@@ -1,41 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sys
-
-// NOTE: keep in sync with cmd/compile/internal/types.CalcSize
-// to make the compiler recognize this as an intrinsic type.
-type nih struct{}
-
-// NotInHeap is a type must never be allocated from the GC'd heap or on the stack,
-// and is called not-in-heap.
-//
-// Other types can embed NotInHeap to make it not-in-heap. Specifically, pointers
-// to these types must always fail the `runtime.inheap` check. The type may be used
-// for global variables, or for objects in unmanaged memory (e.g., allocated with
-// `sysAlloc`, `persistentalloc`, r`fixalloc`, or from a manually-managed span).
-//
-// Specifically:
-//
-// 1. `new(T)`, `make([]T)`, `append([]T, ...)` and implicit heap
-// allocation of T are disallowed. (Though implicit allocations are
-// disallowed in the runtime anyway.)
-//
-// 2. A pointer to a regular type (other than `unsafe.Pointer`) cannot be
-// converted to a pointer to a not-in-heap type, even if they have the
-// same underlying type.
-//
-// 3. Any type that containing a not-in-heap type is itself considered as not-in-heap.
-//
-// - Structs and arrays are not-in-heap if their elements are not-in-heap.
-// - Maps and channels contains no-in-heap types are disallowed.
-//
-// 4. Write barriers on pointers to not-in-heap types can be omitted.
-//
-// The last point is the real benefit of NotInHeap. The runtime uses
-// it for low-level internal structures to avoid memory barriers in the
-// scheduler and the memory allocator where they are illegal or simply
-// inefficient. This mechanism is reasonably safe and does not compromise
-// the readability of the runtime.
-type NotInHeap struct{ _ nih }
--- a/src/runtime/internal/sys/sys.go
+++ b/src/runtime/internal/sys/sys.go
@@ -1,7 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// package sys contains system- and configuration- and architecture-specific
-// constants used by the runtime.
-package sys
--- a/src/runtime/ints.s
+++ b/src/runtime/ints.s
@@ -0,0 +1,264 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+DATA ·staticuint64s+0x000(SB)/8, $0
+DATA ·staticuint64s+0x008(SB)/8, $1
+DATA ·staticuint64s+0x010(SB)/8, $2
+DATA ·staticuint64s+0x018(SB)/8, $3
+DATA ·staticuint64s+0x020(SB)/8, $4
+DATA ·staticuint64s+0x028(SB)/8, $5
+DATA ·staticuint64s+0x030(SB)/8, $6
+DATA ·staticuint64s+0x038(SB)/8, $7
+DATA ·staticuint64s+0x040(SB)/8, $8
+DATA ·staticuint64s+0x048(SB)/8, $9
+DATA ·staticuint64s+0x050(SB)/8, $10
+DATA ·staticuint64s+0x058(SB)/8, $11
+DATA ·staticuint64s+0x060(SB)/8, $12
+DATA ·staticuint64s+0x068(SB)/8, $13
+DATA ·staticuint64s+0x070(SB)/8, $14
+DATA ·staticuint64s+0x078(SB)/8, $15
+DATA ·staticuint64s+0x080(SB)/8, $16
+DATA ·staticuint64s+0x088(SB)/8, $17
+DATA ·staticuint64s+0x090(SB)/8, $18
+DATA ·staticuint64s+0x098(SB)/8, $19
+DATA ·staticuint64s+0x0a0(SB)/8, $20
+DATA ·staticuint64s+0x0a8(SB)/8, $21
+DATA ·staticuint64s+0x0b0(SB)/8, $22
+DATA ·staticuint64s+0x0b8(SB)/8, $23
+DATA ·staticuint64s+0x0c0(SB)/8, $24
+DATA ·staticuint64s+0x0c8(SB)/8, $25
+DATA ·staticuint64s+0x0d0(SB)/8, $26
+DATA ·staticuint64s+0x0d8(SB)/8, $27
+DATA ·staticuint64s+0x0e0(SB)/8, $28
+DATA ·staticuint64s+0x0e8(SB)/8, $29
+DATA ·staticuint64s+0x0f0(SB)/8, $30
+DATA ·staticuint64s+0x0f8(SB)/8, $31
+DATA ·staticuint64s+0x100(SB)/8, $32
+DATA ·staticuint64s+0x108(SB)/8, $33
+DATA ·staticuint64s+0x110(SB)/8, $34
+DATA ·staticuint64s+0x118(SB)/8, $35
+DATA ·staticuint64s+0x120(SB)/8, $36
+DATA ·staticuint64s+0x128(SB)/8, $37
+DATA ·staticuint64s+0x130(SB)/8, $38
+DATA ·staticuint64s+0x138(SB)/8, $39
+DATA ·staticuint64s+0x140(SB)/8, $40
+DATA ·staticuint64s+0x148(SB)/8, $41
+DATA ·staticuint64s+0x150(SB)/8, $42
+DATA ·staticuint64s+0x158(SB)/8, $43
+DATA ·staticuint64s+0x160(SB)/8, $44
+DATA ·staticuint64s+0x168(SB)/8, $45
+DATA ·staticuint64s+0x170(SB)/8, $46
+DATA ·staticuint64s+0x178(SB)/8, $47
+DATA ·staticuint64s+0x180(SB)/8, $48
+DATA ·staticuint64s+0x188(SB)/8, $49
+DATA ·staticuint64s+0x190(SB)/8, $50
+DATA ·staticuint64s+0x198(SB)/8, $51
+DATA ·staticuint64s+0x1a0(SB)/8, $52
+DATA ·staticuint64s+0x1a8(SB)/8, $53
+DATA ·staticuint64s+0x1b0(SB)/8, $54
+DATA ·staticuint64s+0x1b8(SB)/8, $55
+DATA ·staticuint64s+0x1c0(SB)/8, $56
+DATA ·staticuint64s+0x1c8(SB)/8, $57
+DATA ·staticuint64s+0x1d0(SB)/8, $58
+DATA ·staticuint64s+0x1d8(SB)/8, $59
+DATA ·staticuint64s+0x1e0(SB)/8, $60
+DATA ·staticuint64s+0x1e8(SB)/8, $61
+DATA ·staticuint64s+0x1f0(SB)/8, $62
+DATA ·staticuint64s+0x1f8(SB)/8, $63
+DATA ·staticuint64s+0x200(SB)/8, $64
+DATA ·staticuint64s+0x208(SB)/8, $65
+DATA ·staticuint64s+0x210(SB)/8, $66
+DATA ·staticuint64s+0x218(SB)/8, $67
+DATA ·staticuint64s+0x220(SB)/8, $68
+DATA ·staticuint64s+0x228(SB)/8, $69
+DATA ·staticuint64s+0x230(SB)/8, $70
+DATA ·staticuint64s+0x238(SB)/8, $71
+DATA ·staticuint64s+0x240(SB)/8, $72
+DATA ·staticuint64s+0x248(SB)/8, $73
+DATA ·staticuint64s+0x250(SB)/8, $74
+DATA ·staticuint64s+0x258(SB)/8, $75
+DATA ·staticuint64s+0x260(SB)/8, $76
+DATA ·staticuint64s+0x268(SB)/8, $77
+DATA ·staticuint64s+0x270(SB)/8, $78
+DATA ·staticuint64s+0x278(SB)/8, $79
+DATA ·staticuint64s+0x280(SB)/8, $80
+DATA ·staticuint64s+0x288(SB)/8, $81
+DATA ·staticuint64s+0x290(SB)/8, $82
+DATA ·staticuint64s+0x298(SB)/8, $83
+DATA ·staticuint64s+0x2a0(SB)/8, $84
+DATA ·staticuint64s+0x2a8(SB)/8, $85
+DATA ·staticuint64s+0x2b0(SB)/8, $86
+DATA ·staticuint64s+0x2b8(SB)/8, $87
+DATA ·staticuint64s+0x2c0(SB)/8, $88
+DATA ·staticuint64s+0x2c8(SB)/8, $89
+DATA ·staticuint64s+0x2d0(SB)/8, $90
+DATA ·staticuint64s+0x2d8(SB)/8, $91
+DATA ·staticuint64s+0x2e0(SB)/8, $92
+DATA ·staticuint64s+0x2e8(SB)/8, $93
+DATA ·staticuint64s+0x2f0(SB)/8, $94
+DATA ·staticuint64s+0x2f8(SB)/8, $95
+DATA ·staticuint64s+0x300(SB)/8, $96
+DATA ·staticuint64s+0x308(SB)/8, $97
+DATA ·staticuint64s+0x310(SB)/8, $98
+DATA ·staticuint64s+0x318(SB)/8, $99
+DATA ·staticuint64s+0x320(SB)/8, $100
+DATA ·staticuint64s+0x328(SB)/8, $101
+DATA ·staticuint64s+0x330(SB)/8, $102
+DATA ·staticuint64s+0x338(SB)/8, $103
+DATA ·staticuint64s+0x340(SB)/8, $104
+DATA ·staticuint64s+0x348(SB)/8, $105
+DATA ·staticuint64s+0x350(SB)/8, $106
+DATA ·staticuint64s+0x358(SB)/8, $107
+DATA ·staticuint64s+0x360(SB)/8, $108
+DATA ·staticuint64s+0x368(SB)/8, $109
+DATA ·staticuint64s+0x370(SB)/8, $110
+DATA ·staticuint64s+0x378(SB)/8, $111
+DATA ·staticuint64s+0x380(SB)/8, $112
+DATA ·staticuint64s+0x388(SB)/8, $113
+DATA ·staticuint64s+0x390(SB)/8, $114
+DATA ·staticuint64s+0x398(SB)/8, $115
+DATA ·staticuint64s+0x3a0(SB)/8, $116
+DATA ·staticuint64s+0x3a8(SB)/8, $117
+DATA ·staticuint64s+0x3b0(SB)/8, $118
+DATA ·staticuint64s+0x3b8(SB)/8, $119
+DATA ·staticuint64s+0x3c0(SB)/8, $120
+DATA ·staticuint64s+0x3c8(SB)/8, $121
+DATA ·staticuint64s+0x3d0(SB)/8, $122
+DATA ·staticuint64s+0x3d8(SB)/8, $123
+DATA ·staticuint64s+0x3e0(SB)/8, $124
+DATA ·staticuint64s+0x3e8(SB)/8, $125
+DATA ·staticuint64s+0x3f0(SB)/8, $126
+DATA ·staticuint64s+0x3f8(SB)/8, $127
+DATA ·staticuint64s+0x400(SB)/8, $128
+DATA ·staticuint64s+0x408(SB)/8, $129
+DATA ·staticuint64s+0x410(SB)/8, $130
+DATA ·staticuint64s+0x418(SB)/8, $131
+DATA ·staticuint64s+0x420(SB)/8, $132
+DATA ·staticuint64s+0x428(SB)/8, $133
+DATA ·staticuint64s+0x430(SB)/8, $134
+DATA ·staticuint64s+0x438(SB)/8, $135
+DATA ·staticuint64s+0x440(SB)/8, $136
+DATA ·staticuint64s+0x448(SB)/8, $137
+DATA ·staticuint64s+0x450(SB)/8, $138
+DATA ·staticuint64s+0x458(SB)/8, $139
+DATA ·staticuint64s+0x460(SB)/8, $140
+DATA ·staticuint64s+0x468(SB)/8, $141
+DATA ·staticuint64s+0x470(SB)/8, $142
+DATA ·staticuint64s+0x478(SB)/8, $143
+DATA ·staticuint64s+0x480(SB)/8, $144
+DATA ·staticuint64s+0x488(SB)/8, $145
+DATA ·staticuint64s+0x490(SB)/8, $146
+DATA ·staticuint64s+0x498(SB)/8, $147
+DATA ·staticuint64s+0x4a0(SB)/8, $148
+DATA ·staticuint64s+0x4a8(SB)/8, $149
+DATA ·staticuint64s+0x4b0(SB)/8, $150
+DATA ·staticuint64s+0x4b8(SB)/8, $151
+DATA ·staticuint64s+0x4c0(SB)/8, $152
+DATA ·staticuint64s+0x4c8(SB)/8, $153
+DATA ·staticuint64s+0x4d0(SB)/8, $154
+DATA ·staticuint64s+0x4d8(SB)/8, $155
+DATA ·staticuint64s+0x4e0(SB)/8, $156
+DATA ·staticuint64s+0x4e8(SB)/8, $157
+DATA ·staticuint64s+0x4f0(SB)/8, $158
+DATA ·staticuint64s+0x4f8(SB)/8, $159
+DATA ·staticuint64s+0x500(SB)/8, $160
+DATA ·staticuint64s+0x508(SB)/8, $161
+DATA ·staticuint64s+0x510(SB)/8, $162
+DATA ·staticuint64s+0x518(SB)/8, $163
+DATA ·staticuint64s+0x520(SB)/8, $164
+DATA ·staticuint64s+0x528(SB)/8, $165
+DATA ·staticuint64s+0x530(SB)/8, $166
+DATA ·staticuint64s+0x538(SB)/8, $167
+DATA ·staticuint64s+0x540(SB)/8, $168
+DATA ·staticuint64s+0x548(SB)/8, $169
+DATA ·staticuint64s+0x550(SB)/8, $170
+DATA ·staticuint64s+0x558(SB)/8, $171
+DATA ·staticuint64s+0x560(SB)/8, $172
+DATA ·staticuint64s+0x568(SB)/8, $173
+DATA ·staticuint64s+0x570(SB)/8, $174
+DATA ·staticuint64s+0x578(SB)/8, $175
+DATA ·staticuint64s+0x580(SB)/8, $176
+DATA ·staticuint64s+0x588(SB)/8, $177
+DATA ·staticuint64s+0x590(SB)/8, $178
+DATA ·staticuint64s+0x598(SB)/8, $179
+DATA ·staticuint64s+0x5a0(SB)/8, $180
+DATA ·staticuint64s+0x5a8(SB)/8, $181
+DATA ·staticuint64s+0x5b0(SB)/8, $182
+DATA ·staticuint64s+0x5b8(SB)/8, $183
+DATA ·staticuint64s+0x5c0(SB)/8, $184
+DATA ·staticuint64s+0x5c8(SB)/8, $185
+DATA ·staticuint64s+0x5d0(SB)/8, $186
+DATA ·staticuint64s+0x5d8(SB)/8, $187
+DATA ·staticuint64s+0x5e0(SB)/8, $188
+DATA ·staticuint64s+0x5e8(SB)/8, $189
+DATA ·staticuint64s+0x5f0(SB)/8, $190
+DATA ·staticuint64s+0x5f8(SB)/8, $191
+DATA ·staticuint64s+0x600(SB)/8, $192
+DATA ·staticuint64s+0x608(SB)/8, $193
+DATA ·staticuint64s+0x610(SB)/8, $194
+DATA ·staticuint64s+0x618(SB)/8, $195
+DATA ·staticuint64s+0x620(SB)/8, $196
+DATA ·staticuint64s+0x628(SB)/8, $197
+DATA ·staticuint64s+0x630(SB)/8, $198
+DATA ·staticuint64s+0x638(SB)/8, $199
+DATA ·staticuint64s+0x640(SB)/8, $200
+DATA ·staticuint64s+0x648(SB)/8, $201
+DATA ·staticuint64s+0x650(SB)/8, $202
+DATA ·staticuint64s+0x658(SB)/8, $203
+DATA ·staticuint64s+0x660(SB)/8, $204
+DATA ·staticuint64s+0x668(SB)/8, $205
+DATA ·staticuint64s+0x670(SB)/8, $206
+DATA ·staticuint64s+0x678(SB)/8, $207
+DATA ·staticuint64s+0x680(SB)/8, $208
+DATA ·staticuint64s+0x688(SB)/8, $209
+DATA ·staticuint64s+0x690(SB)/8, $210
+DATA ·staticuint64s+0x698(SB)/8, $211
+DATA ·staticuint64s+0x6a0(SB)/8, $212
+DATA ·staticuint64s+0x6a8(SB)/8, $213
+DATA ·staticuint64s+0x6b0(SB)/8, $214
+DATA ·staticuint64s+0x6b8(SB)/8, $215
+DATA ·staticuint64s+0x6c0(SB)/8, $216
+DATA ·staticuint64s+0x6c8(SB)/8, $217
+DATA ·staticuint64s+0x6d0(SB)/8, $218
+DATA ·staticuint64s+0x6d8(SB)/8, $219
+DATA ·staticuint64s+0x6e0(SB)/8, $220
+DATA ·staticuint64s+0x6e8(SB)/8, $221
+DATA ·staticuint64s+0x6f0(SB)/8, $222
+DATA ·staticuint64s+0x6f8(SB)/8, $223
+DATA ·staticuint64s+0x700(SB)/8, $224
+DATA ·staticuint64s+0x708(SB)/8, $225
+DATA ·staticuint64s+0x710(SB)/8, $226
+DATA ·staticuint64s+0x718(SB)/8, $227
+DATA ·staticuint64s+0x720(SB)/8, $228
+DATA ·staticuint64s+0x728(SB)/8, $229
+DATA ·staticuint64s+0x730(SB)/8, $230
+DATA ·staticuint64s+0x738(SB)/8, $231
+DATA ·staticuint64s+0x740(SB)/8, $232
+DATA ·staticuint64s+0x748(SB)/8, $233
+DATA ·staticuint64s+0x750(SB)/8, $234
+DATA ·staticuint64s+0x758(SB)/8, $235
+DATA ·staticuint64s+0x760(SB)/8, $236
+DATA ·staticuint64s+0x768(SB)/8, $237
+DATA ·staticuint64s+0x770(SB)/8, $238
+DATA ·staticuint64s+0x778(SB)/8, $239
+DATA ·staticuint64s+0x780(SB)/8, $240
+DATA ·staticuint64s+0x788(SB)/8, $241
+DATA ·staticuint64s+0x790(SB)/8, $242
+DATA ·staticuint64s+0x798(SB)/8, $243
+DATA ·staticuint64s+0x7a0(SB)/8, $244
+DATA ·staticuint64s+0x7a8(SB)/8, $245
+DATA ·staticuint64s+0x7b0(SB)/8, $246
+DATA ·staticuint64s+0x7b8(SB)/8, $247
+DATA ·staticuint64s+0x7c0(SB)/8, $248
+DATA ·staticuint64s+0x7c8(SB)/8, $249
+DATA ·staticuint64s+0x7d0(SB)/8, $250
+DATA ·staticuint64s+0x7d8(SB)/8, $251
+DATA ·staticuint64s+0x7e0(SB)/8, $252
+DATA ·staticuint64s+0x7e8(SB)/8, $253
+DATA ·staticuint64s+0x7f0(SB)/8, $254
+DATA ·staticuint64s+0x7f8(SB)/8, $255
+
+GLOBL ·staticuint64s(SB), RODATA, $0x800
--- a/src/runtime/linkname.go
+++ b/src/runtime/linkname.go
@@ -13,6 +13,7 @@ import _ "unsafe"
 //go:linkname _cgo_panic_internal
 //go:linkname cgoAlwaysFalse
 //go:linkname cgoUse
+//go:linkname cgoKeepAlive
 //go:linkname cgoCheckPointer
 //go:linkname cgoCheckResult
 //go:linkname cgoNoCallback
--- a/src/runtime/linkname_swiss.go
+++ b/src/runtime/linkname_swiss.go
@@ -0,0 +1,211 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/runtime/maps"
+	"internal/runtime/sys"
+	"unsafe"
+)
+
+// Legacy //go:linkname compatibility shims
+//
+// The functions below are unused by the toolchain, and exist only for
+// compatibility with existing //go:linkname use in the ecosystem (and in
+// map_noswiss.go for normal use via GOEXPERIMENT=noswissmap).
+
+// linknameIter is the it argument to mapiterinit and mapiternext.
+//
+// Callers of mapiterinit allocate their own iter structure, which has the
+// layout of the pre-Go 1.24 hiter structure, shown here for posterity:
+//
+//	type hiter struct {
+//		key         unsafe.Pointer
+//		elem        unsafe.Pointer
+//		t           *maptype
+//		h           *hmap
+//		buckets     unsafe.Pointer
+//		bptr        *bmap
+//		overflow    *[]*bmap
+//		oldoverflow *[]*bmap
+//		startBucket uintptr
+//		offset      uint8
+//		wrapped     bool
+//		B           uint8
+//		i           uint8
+//		bucket      uintptr
+//		checkBucket uintptr
+//	}
+//
+// Our structure must maintain compatibility with the old structure. This
+// means:
+//
+//   - Our structure must be the same size or smaller than hiter. Otherwise we
+//     may write outside the caller's hiter allocation.
+//   - Our structure must have the same pointer layout as hiter, so that the GC
+//     tracks pointers properly.
+//
+// Based on analysis of the "hall of shame" users of these linknames:
+//
+//   - The key and elem fields must be kept up to date with the current key/elem.
+//     Some users directly access the key and elem fields rather than calling
+//     reflect.mapiterkey/reflect.mapiterelem.
+//   - The t field must be non-nil after mapiterinit. gonum.org/v1/gonum uses
+//     this to verify the iterator is initialized.
+//   - github.com/segmentio/encoding and github.com/RomiChan/protobuf check if h
+//     is non-nil, but the code has no effect. Thus the value of h does not
+//     matter. See internal/runtime_reflect/map.go.
+type linknameIter struct {
+	// Fields from hiter.
+	key  unsafe.Pointer
+	elem unsafe.Pointer
+	typ  *abi.SwissMapType
+
+	// The real iterator.
+	it *maps.Iter
+}
+
+// mapiterinit is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// mapiterinit should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/goccy/go-json
+//   - github.com/RomiChan/protobuf
+//   - github.com/segmentio/encoding
+//   - github.com/ugorji/go/codec
+//   - github.com/wI2L/jettison
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapiterinit
+func mapiterinit(t *abi.SwissMapType, m *maps.Map, it *linknameIter) {
+	if raceenabled && m != nil {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(m), callerpc, abi.FuncPCABIInternal(mapiterinit))
+	}
+
+	it.typ = t
+
+	it.it = new(maps.Iter)
+	it.it.Init(t, m)
+	it.it.Next()
+
+	it.key = it.it.Key()
+	it.elem = it.it.Elem()
+}
+
+// reflect_mapiterinit is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// reflect_mapiterinit should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/modern-go/reflect2
+//   - gitee.com/quant1x/gox
+//   - github.com/v2pro/plz
+//   - github.com/wI2L/jettison
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_mapiterinit reflect.mapiterinit
+func reflect_mapiterinit(t *abi.SwissMapType, m *maps.Map, it *linknameIter) {
+	mapiterinit(t, m, it)
+}
+
+// mapiternext is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// mapiternext should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/RomiChan/protobuf
+//   - github.com/segmentio/encoding
+//   - github.com/ugorji/go/codec
+//   - gonum.org/v1/gonum
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapiternext
+func mapiternext(it *linknameIter) {
+	if raceenabled {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(it.it.Map()), callerpc, abi.FuncPCABIInternal(mapiternext))
+	}
+
+	it.it.Next()
+
+	it.key = it.it.Key()
+	it.elem = it.it.Elem()
+}
+
+// reflect_mapiternext is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// reflect_mapiternext is for package reflect,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - gitee.com/quant1x/gox
+//   - github.com/modern-go/reflect2
+//   - github.com/goccy/go-json
+//   - github.com/v2pro/plz
+//   - github.com/wI2L/jettison
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_mapiternext reflect.mapiternext
+func reflect_mapiternext(it *linknameIter) {
+	mapiternext(it)
+}
+
+// reflect_mapiterkey is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// reflect_mapiterkey should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/goccy/go-json
+//   - gonum.org/v1/gonum
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_mapiterkey reflect.mapiterkey
+func reflect_mapiterkey(it *linknameIter) unsafe.Pointer {
+	return it.it.Key()
+}
+
+// reflect_mapiterelem is a compatibility wrapper for map iterator for users of
+// //go:linkname from before Go 1.24. It is not used by Go itself. New users
+// should use reflect or the maps package.
+//
+// reflect_mapiterelem should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/goccy/go-json
+//   - gonum.org/v1/gonum
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_mapiterelem reflect.mapiterelem
+func reflect_mapiterelem(it *linknameIter) unsafe.Pointer {
+	return it.it.Elem()
+}
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -11,32 +11,6 @@ import (
 	"unsafe"
 )

-// This implementation depends on OS-specific implementations of
-//
-//	futexsleep(addr *uint32, val uint32, ns int64)
-//		Atomically,
-//			if *addr == val { sleep }
-//		Might be woken up spuriously; that's allowed.
-//		Don't sleep longer than ns; ns < 0 means forever.
-//
-//	futexwakeup(addr *uint32, cnt uint32)
-//		If any procs are sleeping on addr, wake up at most cnt.
-
-const (
-	mutex_unlocked = 0
-	mutex_locked   = 1
-	mutex_sleeping = 2
-
-	active_spin     = 4
-	active_spin_cnt = 30
-	passive_spin    = 1
-)
-
-// Possible lock states are mutex_unlocked, mutex_locked and mutex_sleeping.
-// mutex_sleeping means that there is presumably at least one sleeping thread.
-// Note that there can be spinning threads during all states - they do not
-// affect mutex's state.
-
 // We use the uintptr mutex.key and note.key as a uint32.
 //
 //go:nosplit
@@ -44,103 +18,6 @@ func key32(p *uintptr) *uint32 {
 	return (*uint32)(unsafe.Pointer(p))
 }

-func mutexContended(l *mutex) bool {
-	return atomic.Load(key32(&l.key)) > mutex_locked
-}
-
-func lock(l *mutex) {
-	lockWithRank(l, getLockRank(l))
-}
-
-func lock2(l *mutex) {
-	gp := getg()
-
-	if gp.m.locks < 0 {
-		throw("runtime·lock: lock count")
-	}
-	gp.m.locks++
-
-	// Speculative grab for lock.
-	v := atomic.Xchg(key32(&l.key), mutex_locked)
-	if v == mutex_unlocked {
-		return
-	}
-
-	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
-	// depending on whether there is a thread sleeping
-	// on this mutex. If we ever change l->key from
-	// MUTEX_SLEEPING to some other value, we must be
-	// careful to change it back to MUTEX_SLEEPING before
-	// returning, to ensure that the sleeping thread gets
-	// its wakeup call.
-	wait := v
-
-	timer := &lockTimer{lock: l}
-	timer.begin()
-	// On uniprocessors, no point spinning.
-	// On multiprocessors, spin for ACTIVE_SPIN attempts.
-	spin := 0
-	if ncpu > 1 {
-		spin = active_spin
-	}
-	for {
-		// Try for lock, spinning.
-		for i := 0; i < spin; i++ {
-			for l.key == mutex_unlocked {
-				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
-					timer.end()
-					return
-				}
-			}
-			procyield(active_spin_cnt)
-		}
-
-		// Try for lock, rescheduling.
-		for i := 0; i < passive_spin; i++ {
-			for l.key == mutex_unlocked {
-				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
-					timer.end()
-					return
-				}
-			}
-			osyield()
-		}
-
-		// Sleep.
-		v = atomic.Xchg(key32(&l.key), mutex_sleeping)
-		if v == mutex_unlocked {
-			timer.end()
-			return
-		}
-		wait = mutex_sleeping
-		futexsleep(key32(&l.key), mutex_sleeping, -1)
-	}
-}
-
-func unlock(l *mutex) {
-	unlockWithRank(l)
-}
-
-func unlock2(l *mutex) {
-	v := atomic.Xchg(key32(&l.key), mutex_unlocked)
-	if v == mutex_unlocked {
-		throw("unlock of unlocked lock")
-	}
-	if v == mutex_sleeping {
-		futexwakeup(key32(&l.key), 1)
-	}
-
-	gp := getg()
-	gp.m.mLockProfile.recordUnlock(l)
-	gp.m.locks--
-	if gp.m.locks < 0 {
-		throw("runtime·unlock: lock count")
-	}
-	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
-		gp.stackguard0 = stackPreempt
-	}
-}
-
 // One-time notifications.
 func noteclear(n *note) {
 	n.key = 0
@@ -254,3 +131,33 @@ func beforeIdle(int64, int64) (*g, bool) {
 }

 func checkTimeouts() {}
+
+//go:nosplit
+func semacreate(mp *m) {}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	mp := getg().m
+
+	for v := atomic.Xadd(&mp.waitsema, -1); ; v = atomic.Load(&mp.waitsema) {
+		if int32(v) >= 0 {
+			return 0
+		}
+		futexsleep(&mp.waitsema, v, ns)
+		if ns >= 0 {
+			if int32(v) >= 0 {
+				return 0
+			} else {
+				return -1
+			}
+		}
+	}
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	v := atomic.Xadd(&mp.waitsema, 1)
+	if v == 0 {
+		futexwakeup(&mp.waitsema, 1)
+	}
+}
--- a/src/runtime/lock_futex_tristate.go
+++ b/src/runtime/lock_futex_tristate.go
@@ -0,0 +1,138 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (dragonfly || freebsd || linux) && !goexperiment.spinbitmutex
+
+package runtime
+
+import (
+	"internal/runtime/atomic"
+)
+
+// This implementation depends on OS-specific implementations of
+//
+//	futexsleep(addr *uint32, val uint32, ns int64)
+//		Atomically,
+//			if *addr == val { sleep }
+//		Might be woken up spuriously; that's allowed.
+//		Don't sleep longer than ns; ns < 0 means forever.
+//
+//	futexwakeup(addr *uint32, cnt uint32)
+//		If any procs are sleeping on addr, wake up at most cnt.
+
+const (
+	mutex_unlocked = 0
+	mutex_locked   = 1
+	mutex_sleeping = 2
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+// Possible lock states are mutex_unlocked, mutex_locked and mutex_sleeping.
+// mutex_sleeping means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
+
+type mWaitList struct{}
+
+func lockVerifyMSize() {}
+
+func mutexContended(l *mutex) bool {
+	return atomic.Load(key32(&l.key)) > mutex_locked
+}
+
+func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
+	gp := getg()
+
+	if gp.m.locks < 0 {
+		throw("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	v := atomic.Xchg(key32(&l.key), mutex_locked)
+	if v == mutex_unlocked {
+		return
+	}
+
+	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+	// depending on whether there is a thread sleeping
+	// on this mutex. If we ever change l->key from
+	// MUTEX_SLEEPING to some other value, we must be
+	// careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets
+	// its wakeup call.
+	wait := v
+
+	timer := &lockTimer{lock: l}
+	timer.begin()
+	// On uniprocessors, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+	for {
+		// Try for lock, spinning.
+		for i := 0; i < spin; i++ {
+			for l.key == mutex_unlocked {
+				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
+					return
+				}
+			}
+			procyield(active_spin_cnt)
+		}
+
+		// Try for lock, rescheduling.
+		for i := 0; i < passive_spin; i++ {
+			for l.key == mutex_unlocked {
+				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
+					return
+				}
+			}
+			osyield()
+		}
+
+		// Sleep.
+		v = atomic.Xchg(key32(&l.key), mutex_sleeping)
+		if v == mutex_unlocked {
+			timer.end()
+			return
+		}
+		wait = mutex_sleeping
+		futexsleep(key32(&l.key), mutex_sleeping, -1)
+	}
+}
+
+func unlock(l *mutex) {
+	unlockWithRank(l)
+}
+
+func unlock2(l *mutex) {
+	v := atomic.Xchg(key32(&l.key), mutex_unlocked)
+	if v == mutex_unlocked {
+		throw("unlock of unlocked lock")
+	}
+	if v == mutex_sleeping {
+		futexwakeup(key32(&l.key), 1)
+	}
+
+	gp := getg()
+	gp.m.mLockProfile.recordUnlock(l)
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		throw("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
--- a/src/runtime/lock_js.go
+++ b/src/runtime/lock_js.go
@@ -6,7 +6,10 @@

 package runtime

-import _ "unsafe" // for go:linkname
+import (
+	"internal/runtime/sys"
+	_ "unsafe" // for go:linkname
+)

 // js/wasm has no support for threads yet. There is no preemption.

@@ -23,6 +26,10 @@ const (
 	passive_spin    = 1
 )

+type mWaitList struct{}
+
+func lockVerifyMSize() {}
+
 func mutexContended(l *mutex) bool {
 	return false
 }
@@ -63,29 +70,21 @@ func unlock2(l *mutex) {

 // One-time notifications.

-type noteWithTimeout struct {
-	gp       *g
-	deadline int64
-}
-
-var (
-	notes            = make(map[*note]*g)
-	notesWithTimeout = make(map[*note]noteWithTimeout)
-)
+// Linked list of notes with a deadline.
+var allDeadlineNotes *note

 func noteclear(n *note) {
-	n.key = note_cleared
+	n.status = note_cleared
 }

 func notewakeup(n *note) {
-	// gp := getg()
-	if n.key == note_woken {
+	if n.status == note_woken {
 		throw("notewakeup - double wakeup")
 	}
-	cleared := n.key == note_cleared
-	n.key = note_woken
+	cleared := n.status == note_cleared
+	n.status = note_woken
 	if cleared {
-		goready(notes[n], 1)
+		goready(n.gp, 1)
 	}
 }

@@ -113,48 +112,50 @@ func notetsleepg(n *note, ns int64) bool {
 		}

 		id := scheduleTimeoutEvent(delay)
-		mp := acquirem()
-		notes[n] = gp
-		notesWithTimeout[n] = noteWithTimeout{gp: gp, deadline: deadline}
-		releasem(mp)
+
+		n.gp = gp
+		n.deadline = deadline
+		if allDeadlineNotes != nil {
+			allDeadlineNotes.allprev = n
+		}
+		n.allnext = allDeadlineNotes
+		allDeadlineNotes = n

 		gopark(nil, nil, waitReasonSleep, traceBlockSleep, 1)

 		clearTimeoutEvent(id) // note might have woken early, clear timeout

-		mp = acquirem()
-		delete(notes, n)
-		delete(notesWithTimeout, n)
-		releasem(mp)
+		n.gp = nil
+		n.deadline = 0
+		if n.allprev != nil {
+			n.allprev.allnext = n.allnext
+		}
+		if allDeadlineNotes == n {
+			allDeadlineNotes = n.allnext
+		}
+		n.allprev = nil
+		n.allnext = nil

-		return n.key == note_woken
+		return n.status == note_woken
 	}

-	for n.key != note_woken {
-		mp := acquirem()
-		notes[n] = gp
-		releasem(mp)
+	for n.status != note_woken {
+		n.gp = gp

 		gopark(nil, nil, waitReasonZero, traceBlockGeneric, 1)

-		mp = acquirem()
-		delete(notes, n)
-		releasem(mp)
+		n.gp = nil
 	}
 	return true
 }

 // checkTimeouts resumes goroutines that are waiting on a note which has reached its deadline.
-// TODO(drchase): need to understand if write barriers are really okay in this context.
-//
-//go:yeswritebarrierrec
 func checkTimeouts() {
 	now := nanotime()
-	// TODO: map iteration has the write barriers in it; is that okay?
-	for n, nt := range notesWithTimeout {
-		if n.key == note_cleared && now >= nt.deadline {
-			n.key = note_timeout
-			goready(nt.gp, 1)
+	for n := allDeadlineNotes; n != nil; n = n.allnext {
+		if n.status == note_cleared && n.deadline != 0 && now >= n.deadline {
+			n.status = note_timeout
+			goready(n.gp, 1)
 		}
 	}
 }
@@ -250,7 +251,7 @@ var idleStart int64

 func handleAsyncEvent() {
 	idleStart = nanotime()
-	pause(getcallersp() - 16)
+	pause(sys.GetCallerSP() - 16)
 }

 // clearIdleTimeout clears our record of the timeout started by beforeIdle.
@@ -259,9 +260,6 @@ func clearIdleTimeout() {
 	idleTimeout = nil
 }

-// pause sets SP to newsp and pauses the execution of Go's WebAssembly code until an event is triggered.
-func pause(newsp uintptr)
-
 // scheduleTimeoutEvent tells the WebAssembly environment to trigger an event after ms milliseconds.
 // It returns a timer id that can be used with clearTimeoutEvent.
 //
@@ -300,7 +298,7 @@ func handleEvent() {

 	// return execution to JavaScript
 	idleStart = nanotime()
-	pause(getcallersp() - 16)
+	pause(sys.GetCallerSP() - 16)
 }

 // eventHandler retrieves and executes handlers for pending JavaScript events.
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -11,131 +11,10 @@ import (
 	"unsafe"
 )

-// This implementation depends on OS-specific implementations of
-//
-//	func semacreate(mp *m)
-//		Create a semaphore for mp, if it does not already have one.
-//
-//	func semasleep(ns int64) int32
-//		If ns < 0, acquire m's semaphore and return 0.
-//		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
-//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
-//
-//	func semawakeup(mp *m)
-//		Wake up mp, which is or will soon be sleeping on its semaphore.
 const (
 	locked uintptr = 1
-
-	active_spin     = 4
-	active_spin_cnt = 30
-	passive_spin    = 1
 )

-func mutexContended(l *mutex) bool {
-	return atomic.Loaduintptr(&l.key) > locked
-}
-
-func lock(l *mutex) {
-	lockWithRank(l, getLockRank(l))
-}
-
-func lock2(l *mutex) {
-	gp := getg()
-	if gp.m.locks < 0 {
-		throw("runtime·lock: lock count")
-	}
-	gp.m.locks++
-
-	// Speculative grab for lock.
-	if atomic.Casuintptr(&l.key, 0, locked) {
-		return
-	}
-	semacreate(gp.m)
-
-	timer := &lockTimer{lock: l}
-	timer.begin()
-	// On uniprocessor's, no point spinning.
-	// On multiprocessors, spin for ACTIVE_SPIN attempts.
-	spin := 0
-	if ncpu > 1 {
-		spin = active_spin
-	}
-Loop:
-	for i := 0; ; i++ {
-		v := atomic.Loaduintptr(&l.key)
-		if v&locked == 0 {
-			// Unlocked. Try to lock.
-			if atomic.Casuintptr(&l.key, v, v|locked) {
-				timer.end()
-				return
-			}
-			i = 0
-		}
-		if i < spin {
-			procyield(active_spin_cnt)
-		} else if i < spin+passive_spin {
-			osyield()
-		} else {
-			// Someone else has it.
-			// l->waitm points to a linked list of M's waiting
-			// for this lock, chained through m->nextwaitm.
-			// Queue this M.
-			for {
-				gp.m.nextwaitm = muintptr(v &^ locked)
-				if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
-					break
-				}
-				v = atomic.Loaduintptr(&l.key)
-				if v&locked == 0 {
-					continue Loop
-				}
-			}
-			if v&locked != 0 {
-				// Queued. Wait.
-				semasleep(-1)
-				i = 0
-			}
-		}
-	}
-}
-
-func unlock(l *mutex) {
-	unlockWithRank(l)
-}
-
-// We might not be holding a p in this code.
-//
-//go:nowritebarrier
-func unlock2(l *mutex) {
-	gp := getg()
-	var mp *m
-	for {
-		v := atomic.Loaduintptr(&l.key)
-		if v == locked {
-			if atomic.Casuintptr(&l.key, locked, 0) {
-				break
-			}
-		} else {
-			// Other M's are waiting for the lock.
-			// Dequeue an M.
-			mp = muintptr(v &^ locked).ptr()
-			if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) {
-				// Dequeued an M.  Wake it.
-				semawakeup(mp)
-				break
-			}
-		}
-	}
-	gp.m.mLockProfile.recordUnlock(l)
-	gp.m.locks--
-	if gp.m.locks < 0 {
-		throw("runtime·unlock: lock count")
-	}
-	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
-		gp.stackguard0 = stackPreempt
-	}
-}
-
 // One-time notifications.
 func noteclear(n *note) {
 	n.key = 0
--- a/src/runtime/lock_sema_tristate.go
+++ b/src/runtime/lock_sema_tristate.go
@@ -0,0 +1,148 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (aix || darwin || netbsd || openbsd || plan9 || solaris || windows) && !goexperiment.spinbitmutex
+
+package runtime
+
+import (
+	"internal/runtime/atomic"
+	"unsafe"
+)
+
+// This implementation depends on OS-specific implementations of
+//
+//	func semacreate(mp *m)
+//		Create a semaphore for mp, if it does not already have one.
+//
+//	func semasleep(ns int64) int32
+//		If ns < 0, acquire m's semaphore and return 0.
+//		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	func semawakeup(mp *m)
+//		Wake up mp, which is or will soon be sleeping on its semaphore.
+const (
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+// mWaitList is part of the M struct, and holds the list of Ms that are waiting
+// for a particular runtime.mutex.
+//
+// When an M is unable to immediately obtain a lock, it adds itself to the list
+// of Ms waiting for the lock. It does that via this struct's next field,
+// forming a singly-linked list with the mutex's key field pointing to the head
+// of the list.
+type mWaitList struct {
+	next muintptr // next m waiting for lock
+}
+
+func lockVerifyMSize() {}
+
+func mutexContended(l *mutex) bool {
+	return atomic.Loaduintptr(&l.key) > locked
+}
+
+func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
+	gp := getg()
+	if gp.m.locks < 0 {
+		throw("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	if atomic.Casuintptr(&l.key, 0, locked) {
+		return
+	}
+	semacreate(gp.m)
+
+	timer := &lockTimer{lock: l}
+	timer.begin()
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+Loop:
+	for i := 0; ; i++ {
+		v := atomic.Loaduintptr(&l.key)
+		if v&locked == 0 {
+			// Unlocked. Try to lock.
+			if atomic.Casuintptr(&l.key, v, v|locked) {
+				timer.end()
+				return
+			}
+			i = 0
+		}
+		if i < spin {
+			procyield(active_spin_cnt)
+		} else if i < spin+passive_spin {
+			osyield()
+		} else {
+			// Someone else has it.
+			// l.key points to a linked list of M's waiting
+			// for this lock, chained through m.mWaitList.next.
+			// Queue this M.
+			for {
+				gp.m.mWaitList.next = muintptr(v &^ locked)
+				if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
+					break
+				}
+				v = atomic.Loaduintptr(&l.key)
+				if v&locked == 0 {
+					continue Loop
+				}
+			}
+			if v&locked != 0 {
+				// Queued. Wait.
+				semasleep(-1)
+				i = 0
+			}
+		}
+	}
+}
+
+func unlock(l *mutex) {
+	unlockWithRank(l)
+}
+
+// We might not be holding a p in this code.
+//
+//go:nowritebarrier
+func unlock2(l *mutex) {
+	gp := getg()
+	var mp *m
+	for {
+		v := atomic.Loaduintptr(&l.key)
+		if v == locked {
+			if atomic.Casuintptr(&l.key, locked, 0) {
+				break
+			}
+		} else {
+			// Other M's are waiting for the lock.
+			// Dequeue an M.
+			mp = muintptr(v &^ locked).ptr()
+			if atomic.Casuintptr(&l.key, v, uintptr(mp.mWaitList.next)) {
+				// Dequeued an M.  Wake it.
+				semawakeup(mp) // no use of mp after this point; it's awake
+				break
+			}
+		}
+	}
+	gp.m.mLockProfile.recordUnlock(l)
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		throw("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
--- a/src/runtime/lock_spinbit.go
+++ b/src/runtime/lock_spinbit.go
@@ -0,0 +1,372 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || plan9 || solaris || windows) && goexperiment.spinbitmutex
+
+package runtime
+
+import (
+	"internal/goarch"
+	"internal/runtime/atomic"
+	"unsafe"
+)
+
+// This implementation depends on OS-specific implementations of
+//
+//	func semacreate(mp *m)
+//		Create a semaphore for mp, if it does not already have one.
+//
+//	func semasleep(ns int64) int32
+//		If ns < 0, acquire m's semaphore and return 0.
+//		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	func semawakeup(mp *m)
+//		Wake up mp, which is or will soon be sleeping on its semaphore.
+
+// The mutex state consists of four flags and a pointer. The flag at bit 0,
+// mutexLocked, represents the lock itself. Bit 1, mutexSleeping, is a hint that
+// the pointer is non-nil. The fast paths for locking and unlocking the mutex
+// are based on atomic 8-bit swap operations on the low byte; bits 2 through 7
+// are unused.
+//
+// Bit 8, mutexSpinning, is a try-lock that grants a waiting M permission to
+// spin on the state word. Most other Ms must attempt to spend their time
+// sleeping to reduce traffic on the cache line. This is the "spin bit" for
+// which the implementation is named. (The anti-starvation mechanism also grants
+// temporary permission for an M to spin.)
+//
+// Bit 9, mutexStackLocked, is a try-lock that grants an unlocking M permission
+// to inspect the list of waiting Ms and to pop an M off of that stack.
+//
+// The upper bits hold a (partial) pointer to the M that most recently went to
+// sleep. The sleeping Ms form a stack linked by their mWaitList.next fields.
+// Because the fast paths use an 8-bit swap on the low byte of the state word,
+// we'll need to reconstruct the full M pointer from the bits we have. Most Ms
+// are allocated on the heap, and have a known alignment and base offset. (The
+// offset is due to mallocgc's allocation headers.) The main program thread uses
+// a static M value, m0. We check for m0 specifically and add a known offset
+// otherwise.
+
+const (
+	active_spin     = 4  // referenced in proc.go for sync.Mutex implementation
+	active_spin_cnt = 30 // referenced in proc.go for sync.Mutex implementation
+)
+
+const (
+	mutexLocked      = 0x001
+	mutexSleeping    = 0x002
+	mutexSpinning    = 0x100
+	mutexStackLocked = 0x200
+	mutexMMask       = 0x3FF
+	mutexMOffset     = mallocHeaderSize // alignment of heap-allocated Ms (those other than m0)
+
+	mutexActiveSpinCount  = 4
+	mutexActiveSpinSize   = 30
+	mutexPassiveSpinCount = 1
+
+	mutexTailWakePeriod = 16
+)
+
+//go:nosplit
+func key8(p *uintptr) *uint8 {
+	if goarch.BigEndian {
+		return &(*[8]uint8)(unsafe.Pointer(p))[goarch.PtrSize/1-1]
+	}
+	return &(*[8]uint8)(unsafe.Pointer(p))[0]
+}
+
+// mWaitList is part of the M struct, and holds the list of Ms that are waiting
+// for a particular runtime.mutex.
+//
+// When an M is unable to immediately obtain a lock, it adds itself to the list
+// of Ms waiting for the lock. It does that via this struct's next field,
+// forming a singly-linked list with the mutex's key field pointing to the head
+// of the list.
+type mWaitList struct {
+	next muintptr // next m waiting for lock
+}
+
+// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
+func lockVerifyMSize() {
+	size := roundupsize(unsafe.Sizeof(m{}), false) + mallocHeaderSize
+	if size&mutexMMask != 0 {
+		print("M structure uses sizeclass ", size, "/", hex(size), " bytes; ",
+			"incompatible with mutex flag mask ", hex(mutexMMask), "\n")
+		throw("runtime.m memory alignment too small for spinbit mutex")
+	}
+}
+
+// mutexWaitListHead recovers a full muintptr that was missing its low bits.
+// With the exception of the static m0 value, it requires allocating runtime.m
+// values in a size class with a particular minimum alignment. The 2048-byte
+// size class allows recovering the full muintptr value even after overwriting
+// the low 11 bits with flags. We can use those 11 bits as 3 flags and an
+// atomically-swapped byte.
+//
+//go:nosplit
+func mutexWaitListHead(v uintptr) muintptr {
+	if highBits := v &^ mutexMMask; highBits == 0 {
+		return 0
+	} else if m0bits := muintptr(unsafe.Pointer(&m0)); highBits == uintptr(m0bits)&^mutexMMask {
+		return m0bits
+	} else {
+		return muintptr(highBits + mutexMOffset)
+	}
+}
+
+// mutexPreferLowLatency reports if this mutex prefers low latency at the risk
+// of performance collapse. If so, we can allow all waiting threads to spin on
+// the state word rather than go to sleep.
+//
+// TODO: We could have the waiting Ms each spin on their own private cache line,
+// especially if we can put a bound on the on-CPU time that would consume.
+//
+// TODO: If there's a small set of mutex values with special requirements, they
+// could make use of a more specialized lock2/unlock2 implementation. Otherwise,
+// we're constrained to what we can fit within a single uintptr with no
+// additional storage on the M for each lock held.
+//
+//go:nosplit
+func mutexPreferLowLatency(l *mutex) bool {
+	switch l {
+	default:
+		return false
+	case &sched.lock:
+		// We often expect sched.lock to pass quickly between Ms in a way that
+		// each M has unique work to do: for instance when we stop-the-world
+		// (bringing each P to idle) or add new netpoller-triggered work to the
+		// global run queue.
+		return true
+	}
+}
+
+func mutexContended(l *mutex) bool {
+	return atomic.Loaduintptr(&l.key) > mutexLocked
+}
+
+func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
+	gp := getg()
+	if gp.m.locks < 0 {
+		throw("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	k8 := key8(&l.key)
+
+	// Speculative grab for lock.
+	v8 := atomic.Xchg8(k8, mutexLocked)
+	if v8&mutexLocked == 0 {
+		if v8&mutexSleeping != 0 {
+			atomic.Or8(k8, mutexSleeping)
+		}
+		return
+	}
+	semacreate(gp.m)
+
+	timer := &lockTimer{lock: l}
+	timer.begin()
+	// On uniprocessors, no point spinning.
+	// On multiprocessors, spin for mutexActiveSpinCount attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = mutexActiveSpinCount
+	}
+
+	var weSpin, atTail bool
+	v := atomic.Loaduintptr(&l.key)
+tryAcquire:
+	for i := 0; ; i++ {
+		if v&mutexLocked == 0 {
+			if weSpin {
+				next := (v &^ mutexSpinning) | mutexSleeping | mutexLocked
+				if next&^mutexMMask == 0 {
+					// The fast-path Xchg8 may have cleared mutexSleeping. Fix
+					// the hint so unlock2 knows when to use its slow path.
+					next = next &^ mutexSleeping
+				}
+				if atomic.Casuintptr(&l.key, v, next) {
+					timer.end()
+					return
+				}
+			} else {
+				prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping)
+				if prev8&mutexLocked == 0 {
+					timer.end()
+					return
+				}
+			}
+			v = atomic.Loaduintptr(&l.key)
+			continue tryAcquire
+		}
+
+		if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) {
+			v |= mutexSpinning
+			weSpin = true
+		}
+
+		if weSpin || atTail || mutexPreferLowLatency(l) {
+			if i < spin {
+				procyield(mutexActiveSpinSize)
+				v = atomic.Loaduintptr(&l.key)
+				continue tryAcquire
+			} else if i < spin+mutexPassiveSpinCount {
+				osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.
+				v = atomic.Loaduintptr(&l.key)
+				continue tryAcquire
+			}
+		}
+
+		// Go to sleep
+		if v&mutexLocked == 0 {
+			throw("runtime·lock: sleeping while lock is available")
+		}
+
+		// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
+		gp.m.mWaitList.next = mutexWaitListHead(v)
+
+		// Pack a (partial) pointer to this M with the current lock state bits
+		next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
+		if weSpin { // If we were spinning, prepare to retire
+			next = next &^ mutexSpinning
+		}
+
+		if atomic.Casuintptr(&l.key, v, next) {
+			weSpin = false
+			// We've pushed ourselves onto the stack of waiters. Wait.
+			semasleep(-1)
+			atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
+			i = 0
+		}
+
+		gp.m.mWaitList.next = 0
+		v = atomic.Loaduintptr(&l.key)
+	}
+}
+
+func unlock(l *mutex) {
+	unlockWithRank(l)
+}
+
+// We might not be holding a p in this code.
+//
+//go:nowritebarrier
+func unlock2(l *mutex) {
+	gp := getg()
+
+	prev8 := atomic.Xchg8(key8(&l.key), 0)
+	if prev8&mutexLocked == 0 {
+		throw("unlock of unlocked lock")
+	}
+
+	if prev8&mutexSleeping != 0 {
+		unlock2Wake(l)
+	}
+
+	gp.m.mLockProfile.recordUnlock(l)
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		throw("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
+
+// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
+//
+//go:nowritebarrier
+func unlock2Wake(l *mutex) {
+	v := atomic.Loaduintptr(&l.key)
+
+	// On occasion, seek out and wake the M at the bottom of the stack so it
+	// doesn't starve.
+	antiStarve := cheaprandn(mutexTailWakePeriod) == 0
+	if !(antiStarve || // avoiding starvation may require a wake
+		v&mutexSpinning == 0 || // no spinners means we must wake
+		mutexPreferLowLatency(l)) { // prefer waiters be awake as much as possible
+		return
+	}
+
+	for {
+		if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
+			// No waiting Ms means nothing to do.
+			//
+			// If the stack lock is unavailable, its owner would make the same
+			// wake decisions that we would, so there's nothing for us to do.
+			//
+			// Although: This thread may have a different call stack, which
+			// would result in a different entry in the mutex contention profile
+			// (upon completion of go.dev/issue/66999). That could lead to weird
+			// results if a slow critical section ends but another thread
+			// quickly takes the lock, finishes its own critical section,
+			// releases the lock, and then grabs the stack lock. That quick
+			// thread would then take credit (blame) for the delay that this
+			// slow thread caused. The alternative is to have more expensive
+			// atomic operations (a CAS) on the critical path of unlock2.
+			return
+		}
+		// Other M's are waiting for the lock.
+		// Obtain the stack lock, and pop off an M.
+		next := v | mutexStackLocked
+		if atomic.Casuintptr(&l.key, v, next) {
+			break
+		}
+		v = atomic.Loaduintptr(&l.key)
+	}
+
+	// We own the mutexStackLocked flag. New Ms may push themselves onto the
+	// stack concurrently, but we're now the only thread that can remove or
+	// modify the Ms that are sleeping in the list.
+
+	var committed *m // If we choose an M within the stack, we've made a promise to wake it
+	for {
+		headM := v &^ mutexMMask
+		flags := v & (mutexMMask &^ mutexStackLocked) // preserve low bits, but release stack lock
+
+		mp := mutexWaitListHead(v).ptr()
+		wakem := committed
+		if committed == nil {
+			if v&mutexSpinning == 0 || mutexPreferLowLatency(l) {
+				wakem = mp
+			}
+			if antiStarve {
+				// Wake the M at the bottom of the stack of waiters. (This is
+				// O(N) with the number of waiters.)
+				wakem = mp
+				prev := mp
+				for {
+					next := wakem.mWaitList.next.ptr()
+					if next == nil {
+						break
+					}
+					prev, wakem = wakem, next
+				}
+				if wakem != mp {
+					prev.mWaitList.next = wakem.mWaitList.next
+					committed = wakem
+				}
+			}
+		}
+
+		if wakem == mp {
+			headM = uintptr(mp.mWaitList.next) &^ mutexMMask
+		}
+
+		next := headM | flags
+		if atomic.Casuintptr(&l.key, v, next) {
+			if wakem != nil {
+				// Claimed an M. Wake it.
+				semawakeup(wakem)
+			}
+			break
+		}
+
+		v = atomic.Loaduintptr(&l.key)
+	}
+}
--- a/src/runtime/lock_wasip1.go
+++ b/src/runtime/lock_wasip1.go
@@ -19,6 +19,10 @@ const (
 	active_spin_cnt = 30
 )

+type mWaitList struct{}
+
+func lockVerifyMSize() {}
+
 func mutexContended(l *mutex) bool {
 	return false
 }
--- a/src/runtime/lockrank.go
+++ b/src/runtime/lockrank.go
@@ -43,6 +43,7 @@ const (
 	lockRankRoot
 	lockRankItab
 	lockRankReflectOffs
+	lockRankSynctest
 	lockRankUserArenaState
 	// TRACEGLOBAL
 	lockRankTraceBuf
@@ -116,6 +117,7 @@ var lockNames = []string{
 	lockRankRoot:                "root",
 	lockRankItab:                "itab",
 	lockRankReflectOffs:         "reflectOffs",
+	lockRankSynctest:            "synctest",
 	lockRankUserArenaState:      "userArenaState",
 	lockRankTraceBuf:            "traceBuf",
 	lockRankTraceStrings:        "traceStrings",
@@ -196,6 +198,7 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
 	lockRankRoot:                {},
 	lockRankItab:                {},
 	lockRankReflectOffs:         {lockRankItab},
+	lockRankSynctest:            {lockRankSysmon, lockRankScavenge, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankRoot, lockRankItab, lockRankReflectOffs},
 	lockRankUserArenaState:      {},
 	lockRankTraceBuf:            {lockRankSysmon, lockRankScavenge},
 	lockRankTraceStrings:        {lockRankSysmon, lockRankScavenge, lockRankTraceBuf},
@@ -208,16 +211,16 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
 	lockRankProfBlock:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
 	lockRankProfMemActive:       {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings},
 	lockRankProfMemFuture:       {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankProfMemActive},
-	lockRankGscan:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture},
-	lockRankStackpool:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
-	lockRankStackLarge:          {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
-	lockRankHchanLeaf:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankHchanLeaf},
-	lockRankWbufSpans:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
-	lockRankMheap:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans},
-	lockRankMheapSpecial:        {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
-	lockRankGlobalAlloc:         {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap, lockRankMheapSpecial},
-	lockRankTrace:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
-	lockRankTraceStackTab:       {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap, lockRankTrace},
+	lockRankGscan:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture},
+	lockRankStackpool:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+	lockRankStackLarge:          {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+	lockRankHchanLeaf:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankHchanLeaf},
+	lockRankWbufSpans:           {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan},
+	lockRankMheap:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans},
+	lockRankMheapSpecial:        {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
+	lockRankGlobalAlloc:         {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap, lockRankMheapSpecial},
+	lockRankTrace:               {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap},
+	lockRankTraceStackTab:       {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankDefer, lockRankSweepWaiters, lockRankAssistQueue, lockRankStrongFromWeakQueue, lockRankSweep, lockRankTestR, lockRankTimerSend, lockRankExecW, lockRankCpuprof, lockRankPollCache, lockRankPollDesc, lockRankWakeableSleep, lockRankHchan, lockRankAllocmR, lockRankExecR, lockRankSched, lockRankAllg, lockRankAllp, lockRankNotifyList, lockRankSudog, lockRankTimers, lockRankTimer, lockRankNetpollInit, lockRankRoot, lockRankItab, lockRankReflectOffs, lockRankSynctest, lockRankUserArenaState, lockRankTraceBuf, lockRankTraceStrings, lockRankFin, lockRankSpanSetSpine, lockRankMspanSpecial, lockRankGcBitsArenas, lockRankProfInsert, lockRankProfBlock, lockRankProfMemActive, lockRankProfMemFuture, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankWbufSpans, lockRankMheap, lockRankTrace},
 	lockRankPanic:               {},
 	lockRankDeadlock:            {lockRankPanic, lockRankDeadlock},
 	lockRankRaceFini:            {lockRankPanic},
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -7,6 +7,7 @@ package runtime_test
 import (
 	"flag"
 	"fmt"
+	"internal/asan"
 	"internal/race"
 	"internal/testenv"
 	"os"
@@ -157,6 +158,9 @@ func TestTinyAlloc(t *testing.T) {
 	if runtime.Raceenabled {
 		t.Skip("tinyalloc suppressed when running in race mode")
 	}
+	if asan.Enabled {
+		t.Skip("tinyalloc suppressed when running in asan mode due to redzone")
+	}
 	const N = 16
 	var v [N]unsafe.Pointer
 	for i := range v {
@@ -182,6 +186,9 @@ func TestTinyAllocIssue37262(t *testing.T) {
 	if runtime.Raceenabled {
 		t.Skip("tinyalloc suppressed when running in race mode")
 	}
+	if asan.Enabled {
+		t.Skip("tinyalloc suppressed when running in asan mode due to redzone")
+	}
 	// Try to cause an alignment access fault
 	// by atomically accessing the first 64-bit
 	// value of a tiny-allocated object.
--- a/src/runtime/map_benchmark_test.go
+++ b/src/runtime/map_benchmark_test.go
@@ -5,13 +5,20 @@
 package runtime_test

 import (
+	"encoding/binary"
+	"flag"
 	"fmt"
 	"math/rand"
+	"runtime"
+	"slices"
 	"strconv"
 	"strings"
 	"testing"
+	"unsafe"
 )

+var mapbench = flag.Bool("mapbench", false, "enable the full set of map benchmark variants")
+
 const size = 10

 func BenchmarkHashStringSpeed(b *testing.B) {
@@ -189,10 +196,12 @@ func BenchmarkSmallStrMap(b *testing.B) {
 	}
 }

-func BenchmarkMapStringKeysEight_16(b *testing.B) { benchmarkMapStringKeysEight(b, 16) }
-func BenchmarkMapStringKeysEight_32(b *testing.B) { benchmarkMapStringKeysEight(b, 32) }
-func BenchmarkMapStringKeysEight_64(b *testing.B) { benchmarkMapStringKeysEight(b, 64) }
-func BenchmarkMapStringKeysEight_1M(b *testing.B) { benchmarkMapStringKeysEight(b, 1<<20) }
+func BenchmarkMapStringKeysEight_16(b *testing.B)  { benchmarkMapStringKeysEight(b, 16) }
+func BenchmarkMapStringKeysEight_32(b *testing.B)  { benchmarkMapStringKeysEight(b, 32) }
+func BenchmarkMapStringKeysEight_64(b *testing.B)  { benchmarkMapStringKeysEight(b, 64) }
+func BenchmarkMapStringKeysEight_128(b *testing.B) { benchmarkMapStringKeysEight(b, 128) }
+func BenchmarkMapStringKeysEight_256(b *testing.B) { benchmarkMapStringKeysEight(b, 256) }
+func BenchmarkMapStringKeysEight_1M(b *testing.B)  { benchmarkMapStringKeysEight(b, 1<<20) }

 func benchmarkMapStringKeysEight(b *testing.B, keySize int) {
 	m := make(map[string]bool)
@@ -206,17 +215,6 @@ func benchmarkMapStringKeysEight(b *testing.B, keySize int) {
 	}
 }

-func BenchmarkIntMap(b *testing.B) {
-	m := make(map[int]bool)
-	for i := 0; i < 8; i++ {
-		m[i] = true
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[7]
-	}
-}
-
 func BenchmarkMapFirst(b *testing.B) {
 	for n := 1; n <= 16; n++ {
 		b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
@@ -260,12 +258,41 @@ func BenchmarkMapLast(b *testing.B) {
 	}
 }

+func cyclicPermutation(n int) []int {
+	// From https://crypto.stackexchange.com/questions/51787/creating-single-cycle-permutations
+	p := rand.New(rand.NewSource(1)).Perm(n)
+	inc := make([]int, n)
+	pInv := make([]int, n)
+	for i := 0; i < n; i++ {
+		inc[i] = (i + 1) % n
+		pInv[p[i]] = i
+	}
+	res := make([]int, n)
+	for i := 0; i < n; i++ {
+		res[i] = pInv[inc[p[i]]]
+	}
+
+	// Test result.
+	j := 0
+	for i := 0; i < n-1; i++ {
+		j = res[j]
+		if j == 0 {
+			panic("got back to 0 too early")
+		}
+	}
+	j = res[j]
+	if j != 0 {
+		panic("didn't get back to 0")
+	}
+	return res
+}
+
 func BenchmarkMapCycle(b *testing.B) {
 	// Arrange map entries to be a permutation, so that
 	// we hit all entries, and one lookup is data dependent
 	// on the previous lookup.
 	const N = 3127
-	p := rand.New(rand.NewSource(1)).Perm(N)
+	p := cyclicPermutation(N)
 	m := map[int]int{}
 	for i := 0; i < N; i++ {
 		m[i] = p[i]
@@ -333,27 +360,6 @@ func BenchmarkNewSmallMap(b *testing.B) {
 	}
 }

-func BenchmarkMapIter(b *testing.B) {
-	m := make(map[int]bool)
-	for i := 0; i < 8; i++ {
-		m[i] = true
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		for range m {
-		}
-	}
-}
-
-func BenchmarkMapIterEmpty(b *testing.B) {
-	m := make(map[int]bool)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		for range m {
-		}
-	}
-}
-
 func BenchmarkSameLengthMap(b *testing.B) {
 	// long strings, same length, differ in first few
 	// and last few bytes.
@@ -368,28 +374,6 @@ func BenchmarkSameLengthMap(b *testing.B) {
 	}
 }

-type BigKey [3]int64
-
-func BenchmarkBigKeyMap(b *testing.B) {
-	m := make(map[BigKey]bool)
-	k := BigKey{3, 4, 5}
-	m[k] = true
-	for i := 0; i < b.N; i++ {
-		_ = m[k]
-	}
-}
-
-type BigVal [3]int64
-
-func BenchmarkBigValMap(b *testing.B) {
-	m := make(map[BigKey]BigVal)
-	k := BigKey{3, 4, 5}
-	m[k] = BigVal{6, 7, 8}
-	for i := 0; i < b.N; i++ {
-		_ = m[k]
-	}
-}
-
 func BenchmarkSmallKeyMap(b *testing.B) {
 	m := make(map[int16]bool)
 	m[5] = true
@@ -538,3 +522,669 @@ func BenchmarkNewEmptyMapHintGreaterThan8(b *testing.B) {
 		_ = make(map[int]int, hintGreaterThan8)
 	}
 }
+
+func benchSizes(f func(b *testing.B, n int)) func(*testing.B) {
+	var cases = []int{
+		0,
+		6,
+		12,
+		18,
+		24,
+		30,
+		64,
+		128,
+		256,
+		512,
+		1024,
+		2048,
+		4096,
+		8192,
+		1 << 16,
+		1 << 18,
+		1 << 20,
+		1 << 22,
+	}
+
+	// Cases enabled by default. Set -mapbench for the remainder.
+	//
+	// With the other type combinations, there are literally thousands of
+	// variations. It take too long to run all of these as part of
+	// builders.
+	byDefault := map[int]bool{
+		6:       true,
+		64:      true,
+		1 << 16: true,
+	}
+
+	return func(b *testing.B) {
+		for _, n := range cases {
+			b.Run("len="+strconv.Itoa(n), func(b *testing.B) {
+				if !*mapbench && !byDefault[n] {
+					b.Skip("Skipped because -mapbench=false")
+				}
+
+				f(b, n)
+			})
+		}
+	}
+}
+func smallBenchSizes(f func(b *testing.B, n int)) func(*testing.B) {
+	return func(b *testing.B) {
+		for n := 1; n <= 8; n++ {
+			b.Run("len="+strconv.Itoa(n), func(b *testing.B) {
+				f(b, n)
+			})
+		}
+	}
+}
+
+// A 16 byte type.
+type smallType [16]byte
+
+// A 512 byte type.
+type mediumType [1 << 9]byte
+
+// A 4KiB type.
+type bigType [1 << 12]byte
+
+type mapBenchmarkKeyType interface {
+	int32 | int64 | string | smallType | mediumType | bigType | *int32
+}
+
+type mapBenchmarkElemType interface {
+	mapBenchmarkKeyType | []int32
+}
+
+func genIntValues[T int | int32 | int64](start, end int) []T {
+	vals := make([]T, 0, end-start)
+	for i := start; i < end; i++ {
+		vals = append(vals, T(i))
+	}
+	return vals
+}
+
+func genStringValues(start, end int) []string {
+	vals := make([]string, 0, end-start)
+	for i := start; i < end; i++ {
+		vals = append(vals, strconv.Itoa(i))
+	}
+	return vals
+}
+
+func genSmallValues(start, end int) []smallType {
+	vals := make([]smallType, 0, end-start)
+	for i := start; i < end; i++ {
+		var v smallType
+		binary.NativeEndian.PutUint64(v[:], uint64(i))
+		vals = append(vals, v)
+	}
+	return vals
+}
+
+func genMediumValues(start, end int) []mediumType {
+	vals := make([]mediumType, 0, end-start)
+	for i := start; i < end; i++ {
+		var v mediumType
+		binary.NativeEndian.PutUint64(v[:], uint64(i))
+		vals = append(vals, v)
+	}
+	return vals
+}
+
+func genBigValues(start, end int) []bigType {
+	vals := make([]bigType, 0, end-start)
+	for i := start; i < end; i++ {
+		var v bigType
+		binary.NativeEndian.PutUint64(v[:], uint64(i))
+		vals = append(vals, v)
+	}
+	return vals
+}
+
+func genPtrValues[T any](start, end int) []*T {
+	// Start and end don't mean much. Each pointer by definition has a
+	// unique identity.
+	vals := make([]*T, 0, end-start)
+	for i := start; i < end; i++ {
+		v := new(T)
+		vals = append(vals, v)
+	}
+	return vals
+}
+
+func genIntSliceValues[T int | int32 | int64](start, end int) [][]T {
+	vals := make([][]T, 0, end-start)
+	for i := start; i < end; i++ {
+		vals = append(vals, []T{T(i)})
+	}
+	return vals
+}
+
+func genValues[T mapBenchmarkElemType](start, end int) []T {
+	var t T
+	switch any(t).(type) {
+	case int32:
+		return any(genIntValues[int32](start, end)).([]T)
+	case int64:
+		return any(genIntValues[int64](start, end)).([]T)
+	case string:
+		return any(genStringValues(start, end)).([]T)
+	case smallType:
+		return any(genSmallValues(start, end)).([]T)
+	case mediumType:
+		return any(genMediumValues(start, end)).([]T)
+	case bigType:
+		return any(genBigValues(start, end)).([]T)
+	case *int32:
+		return any(genPtrValues[int32](start, end)).([]T)
+	case []int32:
+		return any(genIntSliceValues[int32](start, end)).([]T)
+	default:
+		panic("unreachable")
+	}
+}
+
+// Avoid inlining to force a heap allocation.
+//
+//go:noinline
+func newSink[T mapBenchmarkElemType]() *T {
+	return new(T)
+}
+
+// Return a new maps filled with keys and elems. Both slices must be the same length.
+func fillMap[K mapBenchmarkKeyType, E mapBenchmarkElemType](keys []K, elems []E) map[K]E {
+	m := make(map[K]E, len(keys))
+	for i := range keys {
+		m[keys[i]] = elems[i]
+	}
+	return m
+}
+
+func iterCount(b *testing.B, n int) int {
+	// Divide b.N by n so that the ns/op reports time per element,
+	// not time per full map iteration. This makes benchmarks of
+	// different map sizes more comparable.
+	//
+	// If size is zero we still need to do iterations.
+	if n == 0 {
+		return b.N
+	}
+	return b.N / n
+}
+
+func checkAllocSize[K, E any](b *testing.B, n int) {
+	var k K
+	size := uint64(n) * uint64(unsafe.Sizeof(k))
+	var e E
+	size += uint64(n) * uint64(unsafe.Sizeof(e))
+
+	if size >= 1<<30 {
+		b.Skipf("Total key+elem size %d exceeds 1GiB", size)
+	}
+}
+
+func benchmarkMapIter[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	iterations := iterCount(b, n)
+	sinkK := newSink[K]()
+	sinkE := newSink[E]()
+	b.ResetTimer()
+
+	for i := 0; i < iterations; i++ {
+		for k, e := range m {
+			*sinkK = k
+			*sinkE = e
+		}
+	}
+}
+
+func BenchmarkMapIter(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapIter[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapIter[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapIter[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapIter[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapIter[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapIter[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapIter[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapIter[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapIter[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapIter[int32, *int32]))
+}
+
+func benchmarkMapIterLowLoad[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	// Only insert one entry regardless of map size.
+	k := genValues[K](0, 1)
+	e := genValues[E](0, 1)
+
+	m := make(map[K]E, n)
+	for i := range k {
+		m[k[i]] = e[i]
+	}
+
+	iterations := iterCount(b, n)
+	sinkK := newSink[K]()
+	sinkE := newSink[E]()
+	b.ResetTimer()
+
+	for i := 0; i < iterations; i++ {
+		for k, e := range m {
+			*sinkK = k
+			*sinkE = e
+		}
+	}
+}
+
+func BenchmarkMapIterLowLoad(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapIterLowLoad[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapIterLowLoad[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapIterLowLoad[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapIterLowLoad[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapIterLowLoad[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapIterLowLoad[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapIterLowLoad[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapIterLowLoad[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapIterLowLoad[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapIterLowLoad[int32, *int32]))
+}
+
+func benchmarkMapAccessHit[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't access empty map")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	sink := newSink[E]()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		*sink = m[k[i%n]]
+	}
+}
+
+func BenchmarkMapAccessHit(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAccessHit[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAccessHit[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAccessHit[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAccessHit[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAccessHit[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAccessHit[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAccessHit[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAccessHit[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAccessHit[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAccessHit[int32, *int32]))
+}
+
+var sinkOK bool
+
+func benchmarkMapAccessMiss[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	if n == 0 { // Create a lookup values for empty maps.
+		n = 1
+	}
+	w := genValues[K](n, 2*n)
+	b.ResetTimer()
+
+	var ok bool
+	for i := 0; i < b.N; i++ {
+		_, ok = m[w[i%n]]
+	}
+
+	sinkOK = ok
+}
+
+func BenchmarkMapAccessMiss(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAccessMiss[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAccessMiss[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAccessMiss[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAccessMiss[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAccessMiss[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAccessMiss[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAccessMiss[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAccessMiss[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAccessMiss[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAccessMiss[int32, *int32]))
+}
+
+// Assign to a key that already exists.
+func benchmarkMapAssignExists[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't assign to existing keys in empty map")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		m[k[i%n]] = e[i%n]
+	}
+}
+
+func BenchmarkMapAssignExists(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignExists[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignExists[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignExists[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignExists[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignExists[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignExists[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAssignExists[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAssignExists[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAssignExists[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAssignExists[int32, *int32]))
+}
+
+// Fill a map of size n with no hint. Time is per-key. A new map is created
+// every n assignments.
+//
+// TODO(prattmic): Results don't make much sense if b.N < n.
+// TODO(prattmic): Measure distribution of assign time to reveal the grow
+// latency.
+func benchmarkMapAssignFillNoHint[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't create empty map via assignment")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	b.ResetTimer()
+
+	var m map[K]E
+	for i := 0; i < b.N; i++ {
+		if i%n == 0 {
+			m = make(map[K]E)
+		}
+		m[k[i%n]] = e[i%n]
+	}
+}
+
+func BenchmarkMapAssignFillNoHint(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignFillNoHint[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignFillNoHint[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignFillNoHint[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignFillNoHint[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignFillNoHint[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignFillNoHint[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAssignFillNoHint[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAssignFillNoHint[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAssignFillNoHint[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAssignFillNoHint[int32, *int32]))
+}
+
+// Identical to benchmarkMapAssignFillNoHint, but additionally measures the
+// latency of each mapassign to report tail latency due to map grow.
+func benchmarkMapAssignGrowLatency[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't create empty map via assignment")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+
+	// Store the run time of each mapassign. Keeping the full data rather
+	// than a histogram provides higher precision. b.N tends to be <10M, so
+	// the memory requirement isn't too bad.
+	sample := make([]int64, b.N)
+
+	b.ResetTimer()
+
+	var m map[K]E
+	for i := 0; i < b.N; i++ {
+		if i%n == 0 {
+			m = make(map[K]E)
+		}
+		start := runtime.Nanotime()
+		m[k[i%n]] = e[i%n]
+		end := runtime.Nanotime()
+		sample[i] = end - start
+	}
+
+	b.StopTimer()
+
+	slices.Sort(sample)
+	// TODO(prattmic): Grow is so rare that even p99.99 often doesn't
+	// display a grow case. Switch to a more direct measure of grow cases
+	// only?
+	b.ReportMetric(float64(sample[int(float64(len(sample))*0.5)]), "p50-ns/op")
+	b.ReportMetric(float64(sample[int(float64(len(sample))*0.99)]), "p99-ns/op")
+	b.ReportMetric(float64(sample[int(float64(len(sample))*0.999)]), "p99.9-ns/op")
+	b.ReportMetric(float64(sample[int(float64(len(sample))*0.9999)]), "p99.99-ns/op")
+	b.ReportMetric(float64(sample[len(sample)-1]), "p100-ns/op")
+}
+
+func BenchmarkMapAssignGrowLatency(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignGrowLatency[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignGrowLatency[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignGrowLatency[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignGrowLatency[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignGrowLatency[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignGrowLatency[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAssignGrowLatency[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAssignGrowLatency[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAssignGrowLatency[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAssignGrowLatency[int32, *int32]))
+}
+
+// Fill a map of size n with size hint. Time is per-key. A new map is created
+// every n assignments.
+//
+// TODO(prattmic): Results don't make much sense if b.N < n.
+func benchmarkMapAssignFillHint[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't create empty map via assignment")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	b.ResetTimer()
+
+	var m map[K]E
+	for i := 0; i < b.N; i++ {
+		if i%n == 0 {
+			m = make(map[K]E, n)
+		}
+		m[k[i%n]] = e[i%n]
+	}
+}
+
+func BenchmarkMapAssignFillHint(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignFillHint[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignFillHint[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignFillHint[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignFillHint[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignFillHint[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignFillHint[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAssignFillHint[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAssignFillHint[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAssignFillHint[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAssignFillHint[int32, *int32]))
+}
+
+// Fill a map of size n, reusing the same map. Time is per-key. The map is
+// cleared every n assignments.
+//
+// TODO(prattmic): Results don't make much sense if b.N < n.
+func benchmarkMapAssignFillClear[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't create empty map via assignment")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if i%n == 0 {
+			clear(m)
+		}
+		m[k[i%n]] = e[i%n]
+	}
+}
+
+func BenchmarkMapAssignFillClear(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignFillClear[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignFillClear[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignFillClear[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignFillClear[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignFillClear[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignFillClear[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapAssignFillClear[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapAssignFillClear[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapAssignFillClear[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapAssignFillClear[int32, *int32]))
+}
+
+// Modify values using +=.
+func benchmarkMapAssignAddition[K mapBenchmarkKeyType, E int32 | int64 | string](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't modify empty map via assignment")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		m[k[i%n]] += e[i%n]
+	}
+}
+
+func BenchmarkMapAssignAddition(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapAssignAddition[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapAssignAddition[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapAssignAddition[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapAssignAddition[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapAssignAddition[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapAssignAddition[bigType, int32]))
+}
+
+// Modify values append.
+func benchmarkMapAssignAppend[K mapBenchmarkKeyType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't modify empty map via append")
+	}
+	checkAllocSize[K, []int32](b, n)
+	k := genValues[K](0, n)
+	e := genValues[[]int32](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		m[k[i%n]] = append(m[k[i%n]], e[i%n][0])
+	}
+}
+
+func BenchmarkMapAssignAppend(b *testing.B) {
+	b.Run("Key=int32/Elem=[]int32", benchSizes(benchmarkMapAssignAppend[int32]))
+	b.Run("Key=int64/Elem=[]int32", benchSizes(benchmarkMapAssignAppend[int64]))
+	b.Run("Key=string/Elem=[]int32", benchSizes(benchmarkMapAssignAppend[string]))
+}
+
+func benchmarkMapDelete[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't delete from empty map")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if len(m) == 0 {
+			// We'd like to StopTimer while refilling the map, but
+			// it is way too expensive and thus makes the benchmark
+			// take a long time. See https://go.dev/issue/20875.
+			for j := range k {
+				m[k[j]] = e[j]
+			}
+		}
+		delete(m, k[i%n])
+	}
+}
+
+func BenchmarkMapDelete(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapDelete[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapDelete[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapDelete[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapDelete[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapDelete[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapDelete[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapDelete[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapDelete[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapDelete[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapDelete[int32, *int32]))
+}
+
+// Use iterator to pop an element. We want this to be fast, see
+// https://go.dev/issue/8412.
+func benchmarkMapPop[K mapBenchmarkKeyType, E mapBenchmarkElemType](b *testing.B, n int) {
+	if n == 0 {
+		b.Skip("can't delete from empty map")
+	}
+	checkAllocSize[K, E](b, n)
+	k := genValues[K](0, n)
+	e := genValues[E](0, n)
+	m := fillMap(k, e)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if len(m) == 0 {
+			// We'd like to StopTimer while refilling the map, but
+			// it is way too expensive and thus makes the benchmark
+			// take a long time. See https://go.dev/issue/20875.
+			for j := range k {
+				m[k[j]] = e[j]
+			}
+		}
+		for key := range m {
+			delete(m, key)
+			break
+		}
+	}
+}
+
+func BenchmarkMapPop(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", benchSizes(benchmarkMapPop[int32, int32]))
+	b.Run("Key=int64/Elem=int64", benchSizes(benchmarkMapPop[int64, int64]))
+	b.Run("Key=string/Elem=string", benchSizes(benchmarkMapPop[string, string]))
+	b.Run("Key=smallType/Elem=int32", benchSizes(benchmarkMapPop[smallType, int32]))
+	b.Run("Key=mediumType/Elem=int32", benchSizes(benchmarkMapPop[mediumType, int32]))
+	b.Run("Key=bigType/Elem=int32", benchSizes(benchmarkMapPop[bigType, int32]))
+	b.Run("Key=bigType/Elem=bigType", benchSizes(benchmarkMapPop[bigType, bigType]))
+	b.Run("Key=int32/Elem=bigType", benchSizes(benchmarkMapPop[int32, bigType]))
+	b.Run("Key=*int32/Elem=int32", benchSizes(benchmarkMapPop[*int32, int32]))
+	b.Run("Key=int32/Elem=*int32", benchSizes(benchmarkMapPop[int32, *int32]))
+}
+
+func BenchmarkMapDeleteLargeKey(b *testing.B) {
+	m := map[string]int{}
+	for i := range 9 {
+		m[fmt.Sprintf("%d", i)] = i
+	}
+	key := strings.Repeat("*", 10000)
+	for range b.N {
+		delete(m, key)
+	}
+}
+
+func BenchmarkMapSmallAccessHit(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", smallBenchSizes(benchmarkMapAccessHit[int32, int32]))
+	b.Run("Key=int64/Elem=int64", smallBenchSizes(benchmarkMapAccessHit[int64, int64]))
+	b.Run("Key=string/Elem=string", smallBenchSizes(benchmarkMapAccessHit[string, string]))
+}
+func BenchmarkMapSmallAccessMiss(b *testing.B) {
+	b.Run("Key=int32/Elem=int32", smallBenchSizes(benchmarkMapAccessMiss[int32, int32]))
+	b.Run("Key=int64/Elem=int64", smallBenchSizes(benchmarkMapAccessMiss[int64, int64]))
+	b.Run("Key=string/Elem=string", smallBenchSizes(benchmarkMapAccessMiss[string, string]))
+}
--- a/src/runtime/map_fast32_noswiss.go
+++ b/src/runtime/map_fast32_noswiss.go
@@ -2,17 +2,20 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+//go:build !goexperiment.swissmap
+
 package runtime

 import (
 	"internal/abi"
 	"internal/goarch"
+	"internal/runtime/sys"
 	"unsafe"
 )

 func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_fast32))
 	}
 	if h == nil || h.count == 0 {
@@ -41,9 +44,9 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 		}
 	}
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 4) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 4) {
 			if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*4+i*uintptr(t.ValueSize))
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*4+i*uintptr(t.ValueSize))
 			}
 		}
 	}
@@ -61,7 +64,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 //go:linkname mapaccess2_fast32
 func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_fast32))
 	}
 	if h == nil || h.count == 0 {
@@ -90,9 +93,9 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 		}
 	}
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 4) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 4) {
 			if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*4+i*uintptr(t.ValueSize)), true
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*4+i*uintptr(t.ValueSize)), true
 			}
 		}
 	}
@@ -103,7 +106,6 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/ugorji/go/codec
 //
 // Do not remove or change the type signature.
@@ -115,7 +117,7 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast32))
 	}
 	if h.flags&hashWriting != 0 {
@@ -143,7 +145,7 @@ again:

 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if isEmpty(b.tophash[i]) {
 				if insertb == nil {
 					inserti = i
@@ -183,7 +185,7 @@ bucketloop:
 		insertb = h.newoverflow(t, b)
 		inserti = 0 // not necessary, but avoids needlessly spilling inserti
 	}
-	insertb.tophash[inserti&(abi.MapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks
+	insertb.tophash[inserti&(abi.OldMapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks

 	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
 	// store new key at insert position
@@ -192,7 +194,7 @@ bucketloop:
 	h.count++

 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+abi.MapBucketCount*4+inserti*uintptr(t.ValueSize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+abi.OldMapBucketCount*4+inserti*uintptr(t.ValueSize))
 	if h.flags&hashWriting == 0 {
 		fatal("concurrent map writes")
 	}
@@ -214,7 +216,7 @@ func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast32))
 	}
 	if h.flags&hashWriting != 0 {
@@ -242,7 +244,7 @@ again:

 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if isEmpty(b.tophash[i]) {
 				if insertb == nil {
 					inserti = i
@@ -282,7 +284,7 @@ bucketloop:
 		insertb = h.newoverflow(t, b)
 		inserti = 0 // not necessary, but avoids needlessly spilling inserti
 	}
-	insertb.tophash[inserti&(abi.MapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks
+	insertb.tophash[inserti&(abi.OldMapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks

 	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
 	// store new key at insert position
@@ -291,7 +293,7 @@ bucketloop:
 	h.count++

 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+abi.MapBucketCount*4+inserti*uintptr(t.ValueSize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+abi.OldMapBucketCount*4+inserti*uintptr(t.ValueSize))
 	if h.flags&hashWriting == 0 {
 		fatal("concurrent map writes")
 	}
@@ -301,7 +303,7 @@ done:

 func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_fast32))
 	}
 	if h == nil || h.count == 0 {
@@ -324,7 +326,7 @@ func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
 	bOrig := b
 search:
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 4) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 4) {
 			if key != *(*uint32)(k) || isEmpty(b.tophash[i]) {
 				continue
 			}
@@ -336,7 +338,7 @@ search:
 				// 32 bits wide and the key is 32 bits wide also.
 				*(*unsafe.Pointer)(k) = nil
 			}
-			e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*4+i*uintptr(t.ValueSize))
+			e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*4+i*uintptr(t.ValueSize))
 			if t.Elem.Pointers() {
 				memclrHasPointers(e, t.Elem.Size_)
 			} else {
@@ -345,7 +347,7 @@ search:
 			b.tophash[i] = emptyOne
 			// If the bucket now ends in a bunch of emptyOne states,
 			// change those to emptyRest states.
-			if i == abi.MapBucketCount-1 {
+			if i == abi.OldMapBucketCount-1 {
 				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
 					goto notLast
 				}
@@ -364,7 +366,7 @@ search:
 					c := b
 					for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
 					}
-					i = abi.MapBucketCount - 1
+					i = abi.OldMapBucketCount - 1
 				} else {
 					i--
 				}
@@ -412,7 +414,7 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
 		x := &xy[0]
 		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.BucketSize)))
 		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.e = add(x.k, abi.MapBucketCount*4)
+		x.e = add(x.k, abi.OldMapBucketCount*4)

 		if !h.sameSizeGrow() {
 			// Only calculate y pointers if we're growing bigger.
@@ -420,13 +422,13 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
 			y := &xy[1]
 			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.BucketSize)))
 			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.e = add(y.k, abi.MapBucketCount*4)
+			y.e = add(y.k, abi.OldMapBucketCount*4)
 		}

 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
-			e := add(k, abi.MapBucketCount*4)
-			for i := 0; i < abi.MapBucketCount; i, k, e = i+1, add(k, 4), add(e, uintptr(t.ValueSize)) {
+			e := add(k, abi.OldMapBucketCount*4)
+			for i := 0; i < abi.OldMapBucketCount; i, k, e = i+1, add(k, 4), add(e, uintptr(t.ValueSize)) {
 				top := b.tophash[i]
 				if isEmpty(top) {
 					b.tophash[i] = evacuatedEmpty
@@ -448,13 +450,13 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
 				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
 				dst := &xy[useY]                 // evacuation destination

-				if dst.i == abi.MapBucketCount {
+				if dst.i == abi.OldMapBucketCount {
 					dst.b = h.newoverflow(t, dst.b)
 					dst.i = 0
 					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.e = add(dst.k, abi.MapBucketCount*4)
+					dst.e = add(dst.k, abi.OldMapBucketCount*4)
 				}
-				dst.b.tophash[dst.i&(abi.MapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+				dst.b.tophash[dst.i&(abi.OldMapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check

 				// Copy key.
 				if goarch.PtrSize == 4 && t.Key.Pointers() && writeBarrier.enabled {
--- a/src/runtime/map_fast32_swiss.go
+++ b/src/runtime/map_fast32_swiss.go
@@ -0,0 +1,55 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/runtime/maps"
+	"unsafe"
+)
+
+// Functions below pushed from internal/runtime/maps.
+
+//go:linkname mapaccess1_fast32
+func mapaccess1_fast32(t *abi.SwissMapType, m *maps.Map, key uint32) unsafe.Pointer
+
+// mapaccess2_fast32 should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapaccess2_fast32
+func mapaccess2_fast32(t *abi.SwissMapType, m *maps.Map, key uint32) (unsafe.Pointer, bool)
+
+// mapassign_fast32 should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign_fast32
+func mapassign_fast32(t *abi.SwissMapType, m *maps.Map, key uint32) unsafe.Pointer
+
+// mapassign_fast32ptr should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign_fast32ptr
+func mapassign_fast32ptr(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) unsafe.Pointer
+
+//go:linkname mapdelete_fast32
+func mapdelete_fast32(t *abi.SwissMapType, m *maps.Map, key uint32)
--- a/src/runtime/map_fast64_noswiss.go
+++ b/src/runtime/map_fast64_noswiss.go
@@ -2,17 +2,20 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+//go:build !goexperiment.swissmap
+
 package runtime

 import (
 	"internal/abi"
 	"internal/goarch"
+	"internal/runtime/sys"
 	"unsafe"
 )

 func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_fast64))
 	}
 	if h == nil || h.count == 0 {
@@ -41,9 +44,9 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 		}
 	}
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 8) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 8) {
 			if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*8+i*uintptr(t.ValueSize))
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*8+i*uintptr(t.ValueSize))
 			}
 		}
 	}
@@ -61,7 +64,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 //go:linkname mapaccess2_fast64
 func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_fast64))
 	}
 	if h == nil || h.count == 0 {
@@ -90,9 +93,9 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 		}
 	}
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 8) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 8) {
 			if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*8+i*uintptr(t.ValueSize)), true
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*8+i*uintptr(t.ValueSize)), true
 			}
 		}
 	}
@@ -103,7 +106,6 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/ugorji/go/codec
 //
 // Do not remove or change the type signature.
@@ -115,7 +117,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast64))
 	}
 	if h.flags&hashWriting != 0 {
@@ -143,7 +145,7 @@ again:

 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if isEmpty(b.tophash[i]) {
 				if insertb == nil {
 					insertb = b
@@ -183,7 +185,7 @@ bucketloop:
 		insertb = h.newoverflow(t, b)
 		inserti = 0 // not necessary, but avoids needlessly spilling inserti
 	}
-	insertb.tophash[inserti&(abi.MapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks
+	insertb.tophash[inserti&(abi.OldMapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks

 	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
 	// store new key at insert position
@@ -192,7 +194,7 @@ bucketloop:
 	h.count++

 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+abi.MapBucketCount*8+inserti*uintptr(t.ValueSize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+abi.OldMapBucketCount*8+inserti*uintptr(t.ValueSize))
 	if h.flags&hashWriting == 0 {
 		fatal("concurrent map writes")
 	}
@@ -204,7 +206,6 @@ done:
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/ugorji/go/codec
 //
 // Do not remove or change the type signature.
@@ -216,7 +217,7 @@ func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast64))
 	}
 	if h.flags&hashWriting != 0 {
@@ -244,7 +245,7 @@ again:

 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if isEmpty(b.tophash[i]) {
 				if insertb == nil {
 					insertb = b
@@ -284,7 +285,7 @@ bucketloop:
 		insertb = h.newoverflow(t, b)
 		inserti = 0 // not necessary, but avoids needlessly spilling inserti
 	}
-	insertb.tophash[inserti&(abi.MapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks
+	insertb.tophash[inserti&(abi.OldMapBucketCount-1)] = tophash(hash) // mask inserti to avoid bounds checks

 	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
 	// store new key at insert position
@@ -293,7 +294,7 @@ bucketloop:
 	h.count++

 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+abi.MapBucketCount*8+inserti*uintptr(t.ValueSize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+abi.OldMapBucketCount*8+inserti*uintptr(t.ValueSize))
 	if h.flags&hashWriting == 0 {
 		fatal("concurrent map writes")
 	}
@@ -303,7 +304,7 @@ done:

 func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_fast64))
 	}
 	if h == nil || h.count == 0 {
@@ -326,7 +327,7 @@ func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
 	bOrig := b
 search:
 	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < abi.MapBucketCount; i, k = i+1, add(k, 8) {
+		for i, k := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, k = i+1, add(k, 8) {
 			if key != *(*uint64)(k) || isEmpty(b.tophash[i]) {
 				continue
 			}
@@ -340,7 +341,7 @@ search:
 					memclrHasPointers(k, 8)
 				}
 			}
-			e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*8+i*uintptr(t.ValueSize))
+			e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*8+i*uintptr(t.ValueSize))
 			if t.Elem.Pointers() {
 				memclrHasPointers(e, t.Elem.Size_)
 			} else {
@@ -349,7 +350,7 @@ search:
 			b.tophash[i] = emptyOne
 			// If the bucket now ends in a bunch of emptyOne states,
 			// change those to emptyRest states.
-			if i == abi.MapBucketCount-1 {
+			if i == abi.OldMapBucketCount-1 {
 				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
 					goto notLast
 				}
@@ -368,7 +369,7 @@ search:
 					c := b
 					for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
 					}
-					i = abi.MapBucketCount - 1
+					i = abi.OldMapBucketCount - 1
 				} else {
 					i--
 				}
@@ -416,7 +417,7 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
 		x := &xy[0]
 		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.BucketSize)))
 		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.e = add(x.k, abi.MapBucketCount*8)
+		x.e = add(x.k, abi.OldMapBucketCount*8)

 		if !h.sameSizeGrow() {
 			// Only calculate y pointers if we're growing bigger.
@@ -424,13 +425,13 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
 			y := &xy[1]
 			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.BucketSize)))
 			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.e = add(y.k, abi.MapBucketCount*8)
+			y.e = add(y.k, abi.OldMapBucketCount*8)
 		}

 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
-			e := add(k, abi.MapBucketCount*8)
-			for i := 0; i < abi.MapBucketCount; i, k, e = i+1, add(k, 8), add(e, uintptr(t.ValueSize)) {
+			e := add(k, abi.OldMapBucketCount*8)
+			for i := 0; i < abi.OldMapBucketCount; i, k, e = i+1, add(k, 8), add(e, uintptr(t.ValueSize)) {
 				top := b.tophash[i]
 				if isEmpty(top) {
 					b.tophash[i] = evacuatedEmpty
@@ -452,13 +453,13 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
 				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
 				dst := &xy[useY]                 // evacuation destination

-				if dst.i == abi.MapBucketCount {
+				if dst.i == abi.OldMapBucketCount {
 					dst.b = h.newoverflow(t, dst.b)
 					dst.i = 0
 					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.e = add(dst.k, abi.MapBucketCount*8)
+					dst.e = add(dst.k, abi.OldMapBucketCount*8)
 				}
-				dst.b.tophash[dst.i&(abi.MapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+				dst.b.tophash[dst.i&(abi.OldMapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check

 				// Copy key.
 				if t.Key.Pointers() && writeBarrier.enabled {
--- a/src/runtime/map_fast64_swiss.go
+++ b/src/runtime/map_fast64_swiss.go
@@ -0,0 +1,56 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/runtime/maps"
+	"unsafe"
+)
+
+// Functions below pushed from internal/runtime/maps.
+
+//go:linkname mapaccess1_fast64
+func mapaccess1_fast64(t *abi.SwissMapType, m *maps.Map, key uint64) unsafe.Pointer
+
+// mapaccess2_fast64 should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapaccess2_fast64
+func mapaccess2_fast64(t *abi.SwissMapType, m *maps.Map, key uint64) (unsafe.Pointer, bool)
+
+// mapassign_fast64 should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign_fast64
+func mapassign_fast64(t *abi.SwissMapType, m *maps.Map, key uint64) unsafe.Pointer
+
+// mapassign_fast64ptr should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign_fast64ptr
+func mapassign_fast64ptr(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) unsafe.Pointer
+
+//go:linkname mapdelete_fast64
+func mapdelete_fast64(t *abi.SwissMapType, m *maps.Map, key uint64)
--- a/src/runtime/map_faststr_noswiss.go
+++ b/src/runtime/map_faststr_noswiss.go
@@ -2,17 +2,20 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+//go:build !goexperiment.swissmap
+
 package runtime

 import (
 	"internal/abi"
 	"internal/goarch"
+	"internal/runtime/sys"
 	"unsafe"
 )

 func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_faststr))
 	}
 	if h == nil || h.count == 0 {
@@ -27,7 +30,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 		b := (*bmap)(h.buckets)
 		if key.len < 32 {
 			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+			for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 				k := (*stringStruct)(kptr)
 				if k.len != key.len || isEmpty(b.tophash[i]) {
 					if b.tophash[i] == emptyRest {
@@ -36,14 +39,14 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 					continue
 				}
 				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
+					return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
 				}
 			}
 			return unsafe.Pointer(&zeroVal[0])
 		}
 		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(abi.MapBucketCount)
-		for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+		keymaybe := uintptr(abi.OldMapBucketCount)
+		for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || isEmpty(b.tophash[i]) {
 				if b.tophash[i] == emptyRest {
@@ -52,7 +55,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 				continue
 			}
 			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
 			}
 			// check first 4 bytes
 			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
@@ -62,16 +65,16 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
 				continue
 			}
-			if keymaybe != abi.MapBucketCount {
+			if keymaybe != abi.OldMapBucketCount {
 				// Two keys are potential matches. Use hash to distinguish them.
 				goto dohash
 			}
 			keymaybe = i
 		}
-		if keymaybe != abi.MapBucketCount {
+		if keymaybe != abi.OldMapBucketCount {
 			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*goarch.PtrSize))
 			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+keymaybe*uintptr(t.ValueSize))
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+keymaybe*uintptr(t.ValueSize))
 			}
 		}
 		return unsafe.Pointer(&zeroVal[0])
@@ -92,13 +95,13 @@ dohash:
 	}
 	top := tophash(hash)
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
 			}
 			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
 			}
 		}
 	}
@@ -116,7 +119,7 @@ dohash:
 //go:linkname mapaccess2_faststr
 func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_faststr))
 	}
 	if h == nil || h.count == 0 {
@@ -131,7 +134,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 		b := (*bmap)(h.buckets)
 		if key.len < 32 {
 			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+			for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 				k := (*stringStruct)(kptr)
 				if k.len != key.len || isEmpty(b.tophash[i]) {
 					if b.tophash[i] == emptyRest {
@@ -140,14 +143,14 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 					continue
 				}
 				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
+					return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
 				}
 			}
 			return unsafe.Pointer(&zeroVal[0]), false
 		}
 		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(abi.MapBucketCount)
-		for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+		keymaybe := uintptr(abi.OldMapBucketCount)
+		for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || isEmpty(b.tophash[i]) {
 				if b.tophash[i] == emptyRest {
@@ -156,7 +159,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 				continue
 			}
 			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
 			}
 			// check first 4 bytes
 			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
@@ -166,16 +169,16 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
 				continue
 			}
-			if keymaybe != abi.MapBucketCount {
+			if keymaybe != abi.OldMapBucketCount {
 				// Two keys are potential matches. Use hash to distinguish them.
 				goto dohash
 			}
 			keymaybe = i
 		}
-		if keymaybe != abi.MapBucketCount {
+		if keymaybe != abi.OldMapBucketCount {
 			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*goarch.PtrSize))
 			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+keymaybe*uintptr(t.ValueSize)), true
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+keymaybe*uintptr(t.ValueSize)), true
 			}
 		}
 		return unsafe.Pointer(&zeroVal[0]), false
@@ -196,13 +199,13 @@ dohash:
 	}
 	top := tophash(hash)
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
 			}
 			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
+				return add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize)), true
 			}
 		}
 	}
@@ -213,7 +216,6 @@ dohash:
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/ugorji/go/codec
 //
 // Do not remove or change the type signature.
@@ -225,7 +227,7 @@ func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_faststr))
 	}
 	if h.flags&hashWriting != 0 {
@@ -255,7 +257,7 @@ again:

 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if isEmpty(b.tophash[i]) && insertb == nil {
 					insertb = b
@@ -302,7 +304,7 @@ bucketloop:
 		insertb = h.newoverflow(t, b)
 		inserti = 0 // not necessary, but avoids needlessly spilling inserti
 	}
-	insertb.tophash[inserti&(abi.MapBucketCount-1)] = top // mask inserti to avoid bounds checks
+	insertb.tophash[inserti&(abi.OldMapBucketCount-1)] = top // mask inserti to avoid bounds checks

 	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*goarch.PtrSize)
 	// store new key at insert position
@@ -310,7 +312,7 @@ bucketloop:
 	h.count++

 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+inserti*uintptr(t.ValueSize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+inserti*uintptr(t.ValueSize))
 	if h.flags&hashWriting == 0 {
 		fatal("concurrent map writes")
 	}
@@ -320,7 +322,7 @@ done:

 func mapdelete_faststr(t *maptype, h *hmap, ky string) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_faststr))
 	}
 	if h == nil || h.count == 0 {
@@ -345,7 +347,7 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) {
 	top := tophash(hash)
 search:
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < abi.MapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < abi.OldMapBucketCount; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
@@ -355,7 +357,7 @@ search:
 			}
 			// Clear key's pointer.
 			k.str = nil
-			e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
+			e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*2*goarch.PtrSize+i*uintptr(t.ValueSize))
 			if t.Elem.Pointers() {
 				memclrHasPointers(e, t.Elem.Size_)
 			} else {
@@ -364,7 +366,7 @@ search:
 			b.tophash[i] = emptyOne
 			// If the bucket now ends in a bunch of emptyOne states,
 			// change those to emptyRest states.
-			if i == abi.MapBucketCount-1 {
+			if i == abi.OldMapBucketCount-1 {
 				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
 					goto notLast
 				}
@@ -383,7 +385,7 @@ search:
 					c := b
 					for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
 					}
-					i = abi.MapBucketCount - 1
+					i = abi.OldMapBucketCount - 1
 				} else {
 					i--
 				}
@@ -431,7 +433,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 		x := &xy[0]
 		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.BucketSize)))
 		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.e = add(x.k, abi.MapBucketCount*2*goarch.PtrSize)
+		x.e = add(x.k, abi.OldMapBucketCount*2*goarch.PtrSize)

 		if !h.sameSizeGrow() {
 			// Only calculate y pointers if we're growing bigger.
@@ -439,13 +441,13 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 			y := &xy[1]
 			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.BucketSize)))
 			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.e = add(y.k, abi.MapBucketCount*2*goarch.PtrSize)
+			y.e = add(y.k, abi.OldMapBucketCount*2*goarch.PtrSize)
 		}

 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
-			e := add(k, abi.MapBucketCount*2*goarch.PtrSize)
-			for i := 0; i < abi.MapBucketCount; i, k, e = i+1, add(k, 2*goarch.PtrSize), add(e, uintptr(t.ValueSize)) {
+			e := add(k, abi.OldMapBucketCount*2*goarch.PtrSize)
+			for i := 0; i < abi.OldMapBucketCount; i, k, e = i+1, add(k, 2*goarch.PtrSize), add(e, uintptr(t.ValueSize)) {
 				top := b.tophash[i]
 				if isEmpty(top) {
 					b.tophash[i] = evacuatedEmpty
@@ -467,13 +469,13 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
 				dst := &xy[useY]                 // evacuation destination

-				if dst.i == abi.MapBucketCount {
+				if dst.i == abi.OldMapBucketCount {
 					dst.b = h.newoverflow(t, dst.b)
 					dst.i = 0
 					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.e = add(dst.k, abi.MapBucketCount*2*goarch.PtrSize)
+					dst.e = add(dst.k, abi.OldMapBucketCount*2*goarch.PtrSize)
 				}
-				dst.b.tophash[dst.i&(abi.MapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+				dst.b.tophash[dst.i&(abi.OldMapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check

 				// Copy key.
 				*(*string)(dst.k) = *(*string)(k)
--- a/src/runtime/map_faststr_swiss.go
+++ b/src/runtime/map_faststr_swiss.go
@@ -0,0 +1,44 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/runtime/maps"
+	"unsafe"
+)
+
+// Functions below pushed from internal/runtime/maps.
+
+//go:linkname mapaccess1_faststr
+func mapaccess1_faststr(t *abi.SwissMapType, m *maps.Map, ky string) unsafe.Pointer
+
+// mapaccess2_faststr should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapaccess2_faststr
+func mapaccess2_faststr(t *abi.SwissMapType, m *maps.Map, ky string) (unsafe.Pointer, bool)
+
+// mapassign_faststr should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign_faststr
+func mapassign_faststr(t *abi.SwissMapType, m *maps.Map, s string) unsafe.Pointer
+
+//go:linkname mapdelete_faststr
+func mapdelete_faststr(t *abi.SwissMapType, m *maps.Map, ky string)
--- a/src/runtime/map_noswiss.go
+++ b/src/runtime/map_noswiss.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+//go:build !goexperiment.swissmap
+
 package runtime

 // This file contains the implementation of Go's map type.
@@ -57,19 +59,22 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/runtime/atomic"
-	"runtime/internal/math"
+	"internal/runtime/math"
+	"internal/runtime/sys"
 	"unsafe"
 )

+type maptype = abi.OldMapType
+
 const (
 	// Maximum number of key/elem pairs a bucket can hold.
-	bucketCntBits = abi.MapBucketCountBits
+	bucketCntBits = abi.OldMapBucketCountBits

 	// Maximum average load of a bucket that triggers growth is bucketCnt*13/16 (about 80% full)
 	// Because of minimum alignment rules, bucketCnt is known to be at least 8.
 	// Represent as loadFactorNum/loadFactorDen, to allow integer math.
 	loadFactorDen = 2
-	loadFactorNum = loadFactorDen * abi.MapBucketCount * 13 / 16
+	loadFactorNum = loadFactorDen * abi.OldMapBucketCount * 13 / 16

 	// data offset should be the size of the bmap struct, but needs to be
 	// aligned correctly. For amd64p32 this means 64-bit alignment
@@ -118,6 +123,7 @@ type hmap struct {
 	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
 	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
 	nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)
+	clearSeq   uint64

 	extra *mapextra // optional fields
 }
@@ -144,7 +150,7 @@ type bmap struct {
 	// tophash generally contains the top byte of the hash value
 	// for each key in this bucket. If tophash[0] < minTopHash,
 	// tophash[0] is a bucket evacuation state instead.
-	tophash [abi.MapBucketCount]uint8
+	tophash [abi.OldMapBucketCount]uint8
 	// Followed by bucketCnt keys and then bucketCnt elems.
 	// NOTE: packing all the keys together and then all the elems together makes the
 	// code a bit more complicated than alternating key/elem/key/elem/... but it allows
@@ -171,6 +177,7 @@ type hiter struct {
 	i           uint8
 	bucket      uintptr
 	checkBucket uintptr
+	clearSeq    uint64
 }

 // bucketShift returns 1<<b, optimized for code generation.
@@ -308,7 +315,6 @@ func makemap_small() *hmap {
 // makemap should be an internal detail,
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
-//   - github.com/cloudwego/frugal
 //   - github.com/ugorji/go/codec
 //
 // Do not remove or change the type signature.
@@ -408,7 +414,7 @@ func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets un
 // hold onto it for very long.
 func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(mapaccess1)
 		racereadpc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.Key, key, callerpc, pc)
@@ -444,7 +450,7 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	top := tophash(hash)
 bucketloop:
 	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if b.tophash[i] == emptyRest {
 					break bucketloop
@@ -456,7 +462,7 @@ bucketloop:
 				k = *((*unsafe.Pointer)(k))
 			}
 			if t.Key.Equal(key, k) {
-				e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+				e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 				if t.IndirectElem() {
 					e = *((*unsafe.Pointer)(e))
 				}
@@ -478,7 +484,7 @@ bucketloop:
 //go:linkname mapaccess2
 func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(mapaccess2)
 		racereadpc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.Key, key, callerpc, pc)
@@ -514,7 +520,7 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
 	top := tophash(hash)
 bucketloop:
 	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if b.tophash[i] == emptyRest {
 					break bucketloop
@@ -526,7 +532,7 @@ bucketloop:
 				k = *((*unsafe.Pointer)(k))
 			}
 			if t.Key.Equal(key, k) {
-				e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+				e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 				if t.IndirectElem() {
 					e = *((*unsafe.Pointer)(e))
 				}
@@ -558,7 +564,7 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
 	top := tophash(hash)
 bucketloop:
 	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if b.tophash[i] == emptyRest {
 					break bucketloop
@@ -570,7 +576,7 @@ bucketloop:
 				k = *((*unsafe.Pointer)(k))
 			}
 			if t.Key.Equal(key, k) {
-				e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+				e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 				if t.IndirectElem() {
 					e = *((*unsafe.Pointer)(e))
 				}
@@ -603,7 +609,6 @@ func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Point
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/RomiChan/protobuf
 //   - github.com/segmentio/encoding
 //   - github.com/ugorji/go/codec
@@ -617,7 +622,7 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(mapassign)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.Key, key, callerpc, pc)
@@ -654,12 +659,12 @@ again:
 	var elem unsafe.Pointer
 bucketloop:
 	for {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if isEmpty(b.tophash[i]) && inserti == nil {
 					inserti = &b.tophash[i]
 					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.KeySize))
-					elem = add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+					elem = add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 				}
 				if b.tophash[i] == emptyRest {
 					break bucketloop
@@ -677,7 +682,7 @@ bucketloop:
 			if t.NeedKeyUpdate() {
 				typedmemmove(t.Key, k, key)
 			}
-			elem = add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+			elem = add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 			goto done
 		}
 		ovf := b.overflow(t)
@@ -701,7 +706,7 @@ bucketloop:
 		newb := h.newoverflow(t, b)
 		inserti = &newb.tophash[0]
 		insertk = add(unsafe.Pointer(newb), dataOffset)
-		elem = add(insertk, abi.MapBucketCount*uintptr(t.KeySize))
+		elem = add(insertk, abi.OldMapBucketCount*uintptr(t.KeySize))
 	}

 	// store new key/elem at insert position
@@ -740,7 +745,7 @@ done:
 //go:linkname mapdelete
 func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(mapdelete)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.Key, key, callerpc, pc)
@@ -776,7 +781,7 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 	top := tophash(hash)
 search:
 	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 			if b.tophash[i] != top {
 				if b.tophash[i] == emptyRest {
 					break search
@@ -797,7 +802,7 @@ search:
 			} else if t.Key.Pointers() {
 				memclrHasPointers(k, t.Key.Size_)
 			}
-			e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+			e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 			if t.IndirectElem() {
 				*(*unsafe.Pointer)(e) = nil
 			} else if t.Elem.Pointers() {
@@ -810,7 +815,7 @@ search:
 			// change those to emptyRest states.
 			// It would be nice to make this a separate function, but
 			// for loops are not currently inlineable.
-			if i == abi.MapBucketCount-1 {
+			if i == abi.OldMapBucketCount-1 {
 				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
 					goto notLast
 				}
@@ -829,7 +834,7 @@ search:
 					c := b
 					for b = bOrig; b.overflow(t) != c; b = b.overflow(t) {
 					}
-					i = abi.MapBucketCount - 1
+					i = abi.OldMapBucketCount - 1
 				} else {
 					i--
 				}
@@ -863,7 +868,6 @@ search:
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/goccy/go-json
 //   - github.com/RomiChan/protobuf
 //   - github.com/segmentio/encoding
@@ -876,7 +880,7 @@ search:
 //go:linkname mapiterinit
 func mapiterinit(t *maptype, h *hmap, it *hiter) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiterinit))
 	}

@@ -885,10 +889,11 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 		return
 	}

-	if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
+	if unsafe.Sizeof(hiter{}) != 8+12*goarch.PtrSize {
 		throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
 	}
 	it.h = h
+	it.clearSeq = h.clearSeq

 	// grab snapshot of bucket state
 	it.B = h.B
@@ -906,7 +911,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 	// decide where to start
 	r := uintptr(rand())
 	it.startBucket = r & bucketMask(h.B)
-	it.offset = uint8(r >> h.B & (abi.MapBucketCount - 1))
+	it.offset = uint8(r >> h.B & (abi.OldMapBucketCount - 1))

 	// iterator state
 	it.bucket = it.startBucket
@@ -924,7 +929,6 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/bytedance/sonic
-//   - github.com/cloudwego/frugal
 //   - github.com/RomiChan/protobuf
 //   - github.com/segmentio/encoding
 //   - github.com/ugorji/go/codec
@@ -937,7 +941,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 func mapiternext(it *hiter) {
 	h := it.h
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiternext))
 	}
 	if h.flags&hashWriting != 0 {
@@ -981,8 +985,8 @@ next:
 		}
 		i = 0
 	}
-	for ; i < abi.MapBucketCount; i++ {
-		offi := (i + it.offset) & (abi.MapBucketCount - 1)
+	for ; i < abi.OldMapBucketCount; i++ {
+		offi := (i + it.offset) & (abi.OldMapBucketCount - 1)
 		if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
 			// TODO: emptyRest is hard to use here, as we start iterating
 			// in the middle of a bucket. It's feasible, just tricky.
@@ -992,7 +996,7 @@ next:
 		if t.IndirectKey() {
 			k = *((*unsafe.Pointer)(k))
 		}
-		e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(offi)*uintptr(t.ValueSize))
+		e := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+uintptr(offi)*uintptr(t.ValueSize))
 		if checkBucket != noCheck && !h.sameSizeGrow() {
 			// Special case: iterator was started during a grow to a larger size
 			// and the grow is not done yet. We're working on a bucket whose
@@ -1021,8 +1025,9 @@ next:
 				}
 			}
 		}
-		if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
-			!(t.ReflexiveKey() || t.Key.Equal(k, k)) {
+		if it.clearSeq == h.clearSeq &&
+			((b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
+				!(t.ReflexiveKey() || t.Key.Equal(k, k))) {
 			// This is the golden data, we can return it.
 			// OR
 			// key!=key, so the entry can't be deleted or updated, so we can just return it.
@@ -1062,19 +1067,9 @@ next:

 // mapclear deletes all keys from a map.
 // It is called by the compiler.
-//
-// mapclear should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/cloudwego/frugal
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname mapclear
 func mapclear(t *maptype, h *hmap) {
 	if raceenabled && h != nil {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(mapclear)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 	}
@@ -1088,28 +1083,12 @@ func mapclear(t *maptype, h *hmap) {
 	}

 	h.flags ^= hashWriting
-
-	// Mark buckets empty, so existing iterators can be terminated, see issue #59411.
-	markBucketsEmpty := func(bucket unsafe.Pointer, mask uintptr) {
-		for i := uintptr(0); i <= mask; i++ {
-			b := (*bmap)(add(bucket, i*uintptr(t.BucketSize)))
-			for ; b != nil; b = b.overflow(t) {
-				for i := uintptr(0); i < abi.MapBucketCount; i++ {
-					b.tophash[i] = emptyRest
-				}
-			}
-		}
-	}
-	markBucketsEmpty(h.buckets, bucketMask(h.B))
-	if oldBuckets := h.oldbuckets; oldBuckets != nil {
-		markBucketsEmpty(oldBuckets, h.oldbucketmask())
-	}
-
 	h.flags &^= sameSizeGrow
 	h.oldbuckets = nil
 	h.nevacuate = 0
 	h.noverflow = 0
 	h.count = 0
+	h.clearSeq++

 	// Reset the hash seed to make it more difficult for attackers to
 	// repeatedly trigger hash collisions. See issue 25237.
@@ -1181,7 +1160,7 @@ func hashGrow(t *maptype, h *hmap) {

 // overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
 func overLoadFactor(count int, B uint8) bool {
-	return count > abi.MapBucketCount && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
+	return count > abi.OldMapBucketCount && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
 }

 // tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
@@ -1264,7 +1243,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 		x := &xy[0]
 		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.BucketSize)))
 		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.e = add(x.k, abi.MapBucketCount*uintptr(t.KeySize))
+		x.e = add(x.k, abi.OldMapBucketCount*uintptr(t.KeySize))

 		if !h.sameSizeGrow() {
 			// Only calculate y pointers if we're growing bigger.
@@ -1272,13 +1251,13 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 			y := &xy[1]
 			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.BucketSize)))
 			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.e = add(y.k, abi.MapBucketCount*uintptr(t.KeySize))
+			y.e = add(y.k, abi.OldMapBucketCount*uintptr(t.KeySize))
 		}

 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
-			e := add(k, abi.MapBucketCount*uintptr(t.KeySize))
-			for i := 0; i < abi.MapBucketCount; i, k, e = i+1, add(k, uintptr(t.KeySize)), add(e, uintptr(t.ValueSize)) {
+			e := add(k, abi.OldMapBucketCount*uintptr(t.KeySize))
+			for i := 0; i < abi.OldMapBucketCount; i, k, e = i+1, add(k, uintptr(t.KeySize)), add(e, uintptr(t.ValueSize)) {
 				top := b.tophash[i]
 				if isEmpty(top) {
 					b.tophash[i] = evacuatedEmpty
@@ -1324,13 +1303,13 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
 				dst := &xy[useY]                 // evacuation destination

-				if dst.i == abi.MapBucketCount {
+				if dst.i == abi.OldMapBucketCount {
 					dst.b = h.newoverflow(t, dst.b)
 					dst.i = 0
 					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.e = add(dst.k, abi.MapBucketCount*uintptr(t.KeySize))
+					dst.e = add(dst.k, abi.OldMapBucketCount*uintptr(t.KeySize))
 				}
-				dst.b.tophash[dst.i&(abi.MapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+				dst.b.tophash[dst.i&(abi.OldMapBucketCount-1)] = top // mask dst.i as an optimization, to avoid a bounds check
 				if t.IndirectKey() {
 					*(*unsafe.Pointer)(dst.k) = k2 // copy pointer
 				} else {
@@ -1411,18 +1390,18 @@ func reflect_makemap(t *maptype, cap int) *hmap {
 	if t.Key.Equal == nil {
 		throw("runtime.reflect_makemap: unsupported map key type")
 	}
-	if t.Key.Size_ > abi.MapMaxKeyBytes && (!t.IndirectKey() || t.KeySize != uint8(goarch.PtrSize)) ||
-		t.Key.Size_ <= abi.MapMaxKeyBytes && (t.IndirectKey() || t.KeySize != uint8(t.Key.Size_)) {
+	if t.Key.Size_ > abi.OldMapMaxKeyBytes && (!t.IndirectKey() || t.KeySize != uint8(goarch.PtrSize)) ||
+		t.Key.Size_ <= abi.OldMapMaxKeyBytes && (t.IndirectKey() || t.KeySize != uint8(t.Key.Size_)) {
 		throw("key size wrong")
 	}
-	if t.Elem.Size_ > abi.MapMaxElemBytes && (!t.IndirectElem() || t.ValueSize != uint8(goarch.PtrSize)) ||
-		t.Elem.Size_ <= abi.MapMaxElemBytes && (t.IndirectElem() || t.ValueSize != uint8(t.Elem.Size_)) {
+	if t.Elem.Size_ > abi.OldMapMaxElemBytes && (!t.IndirectElem() || t.ValueSize != uint8(goarch.PtrSize)) ||
+		t.Elem.Size_ <= abi.OldMapMaxElemBytes && (t.IndirectElem() || t.ValueSize != uint8(t.Elem.Size_)) {
 		throw("elem size wrong")
 	}
-	if t.Key.Align_ > abi.MapBucketCount {
+	if t.Key.Align_ > abi.OldMapBucketCount {
 		throw("key align too big")
 	}
-	if t.Elem.Align_ > abi.MapBucketCount {
+	if t.Elem.Align_ > abi.OldMapBucketCount {
 		throw("elem align too big")
 	}
 	if t.Key.Size_%uintptr(t.Key.Align_) != 0 {
@@ -1431,7 +1410,7 @@ func reflect_makemap(t *maptype, cap int) *hmap {
 	if t.Elem.Size_%uintptr(t.Elem.Align_) != 0 {
 		throw("elem size not a multiple of elem align")
 	}
-	if abi.MapBucketCount < 8 {
+	if abi.OldMapBucketCount < 8 {
 		throw("bucketsize too small for proper alignment")
 	}
 	if dataOffset%uintptr(t.Key.Align_) != 0 {
@@ -1537,7 +1516,7 @@ func reflect_mapiternext(it *hiter) {
 	mapiternext(it)
 }

-// reflect_mapiterkey is for package reflect,
+// reflect_mapiterkey was for package reflect,
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/goccy/go-json
@@ -1551,7 +1530,7 @@ func reflect_mapiterkey(it *hiter) unsafe.Pointer {
 	return it.key
 }

-// reflect_mapiterelem is for package reflect,
+// reflect_mapiterelem was for package reflect,
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/goccy/go-json
@@ -1580,7 +1559,7 @@ func reflect_maplen(h *hmap) int {
 		return 0
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(reflect_maplen))
 	}
 	return h.count
@@ -1597,7 +1576,7 @@ func reflectlite_maplen(h *hmap) int {
 		return 0
 	}
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(reflect_maplen))
 	}
 	return h.count
@@ -1622,26 +1601,26 @@ func mapclone(m any) any {
 // moveToBmap moves a bucket from src to dst. It returns the destination bucket or new destination bucket if it overflows
 // and the pos that the next key/value will be written, if pos == bucketCnt means needs to written in overflow bucket.
 func moveToBmap(t *maptype, h *hmap, dst *bmap, pos int, src *bmap) (*bmap, int) {
-	for i := 0; i < abi.MapBucketCount; i++ {
+	for i := 0; i < abi.OldMapBucketCount; i++ {
 		if isEmpty(src.tophash[i]) {
 			continue
 		}

-		for ; pos < abi.MapBucketCount; pos++ {
+		for ; pos < abi.OldMapBucketCount; pos++ {
 			if isEmpty(dst.tophash[pos]) {
 				break
 			}
 		}

-		if pos == abi.MapBucketCount {
+		if pos == abi.OldMapBucketCount {
 			dst = h.newoverflow(t, dst)
 			pos = 0
 		}

 		srcK := add(unsafe.Pointer(src), dataOffset+uintptr(i)*uintptr(t.KeySize))
-		srcEle := add(unsafe.Pointer(src), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(i)*uintptr(t.ValueSize))
+		srcEle := add(unsafe.Pointer(src), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+uintptr(i)*uintptr(t.ValueSize))
 		dstK := add(unsafe.Pointer(dst), dataOffset+uintptr(pos)*uintptr(t.KeySize))
-		dstEle := add(unsafe.Pointer(dst), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(pos)*uintptr(t.ValueSize))
+		dstEle := add(unsafe.Pointer(dst), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+uintptr(pos)*uintptr(t.ValueSize))

 		dst.tophash[pos] = src.tophash[i]
 		if t.IndirectKey() {
@@ -1754,7 +1733,7 @@ func mapclone2(t *maptype, src *hmap) *hmap {
 		// Process entries one at a time.
 		for srcBmap != nil {
 			// move from oldBlucket to new bucket
-			for i := uintptr(0); i < abi.MapBucketCount; i++ {
+			for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
 				if isEmpty(srcBmap.tophash[i]) {
 					continue
 				}
@@ -1768,7 +1747,7 @@ func mapclone2(t *maptype, src *hmap) *hmap {
 					srcK = *((*unsafe.Pointer)(srcK))
 				}

-				srcEle := add(unsafe.Pointer(srcBmap), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
+				srcEle := add(unsafe.Pointer(srcBmap), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+i*uintptr(t.ValueSize))
 				if t.IndirectElem() {
 					srcEle = *((*unsafe.Pointer)(srcEle))
 				}
@@ -1794,7 +1773,7 @@ func keys(m any, p unsafe.Pointer) {
 	}
 	s := (*slice)(p)
 	r := int(rand())
-	offset := uint8(r >> h.B & (abi.MapBucketCount - 1))
+	offset := uint8(r >> h.B & (abi.OldMapBucketCount - 1))
 	if h.B == 0 {
 		copyKeys(t, h, (*bmap)(h.buckets), s, offset)
 		return
@@ -1823,8 +1802,8 @@ func keys(m any, p unsafe.Pointer) {

 func copyKeys(t *maptype, h *hmap, b *bmap, s *slice, offset uint8) {
 	for b != nil {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
-			offi := (i + uintptr(offset)) & (abi.MapBucketCount - 1)
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
+			offi := (i + uintptr(offset)) & (abi.OldMapBucketCount - 1)
 			if isEmpty(b.tophash[offi]) {
 				continue
 			}
@@ -1857,7 +1836,7 @@ func values(m any, p unsafe.Pointer) {
 	}
 	s := (*slice)(p)
 	r := int(rand())
-	offset := uint8(r >> h.B & (abi.MapBucketCount - 1))
+	offset := uint8(r >> h.B & (abi.OldMapBucketCount - 1))
 	if h.B == 0 {
 		copyValues(t, h, (*bmap)(h.buckets), s, offset)
 		return
@@ -1886,8 +1865,8 @@ func values(m any, p unsafe.Pointer) {

 func copyValues(t *maptype, h *hmap, b *bmap, s *slice, offset uint8) {
 	for b != nil {
-		for i := uintptr(0); i < abi.MapBucketCount; i++ {
-			offi := (i + uintptr(offset)) & (abi.MapBucketCount - 1)
+		for i := uintptr(0); i < abi.OldMapBucketCount; i++ {
+			offi := (i + uintptr(offset)) & (abi.OldMapBucketCount - 1)
 			if isEmpty(b.tophash[offi]) {
 				continue
 			}
@@ -1896,7 +1875,7 @@ func copyValues(t *maptype, h *hmap, b *bmap, s *slice, offset uint8) {
 				fatal("concurrent map read and map write")
 			}

-			ele := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+offi*uintptr(t.ValueSize))
+			ele := add(unsafe.Pointer(b), dataOffset+abi.OldMapBucketCount*uintptr(t.KeySize)+offi*uintptr(t.ValueSize))
 			if t.IndirectElem() {
 				ele = *((*unsafe.Pointer)(ele))
 			}
--- a/src/runtime/map_noswiss_test.go
+++ b/src/runtime/map_noswiss_test.go
@@ -0,0 +1,214 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.swissmap
+
+package runtime_test
+
+import (
+	"internal/abi"
+	"internal/goarch"
+	"runtime"
+	"slices"
+	"testing"
+)
+
+func TestHmapSize(t *testing.T) {
+	// The structure of hmap is defined in runtime/map.go
+	// and in cmd/compile/internal/reflectdata/map.go and must be in sync.
+	// The size of hmap should be 56 bytes on 64 bit and 36 bytes on 32 bit platforms.
+	var hmapSize = uintptr(2*8 + 5*goarch.PtrSize)
+	if runtime.RuntimeHmapSize != hmapSize {
+		t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize)
+	}
+}
+
+func TestLoadFactor(t *testing.T) {
+	for b := uint8(0); b < 20; b++ {
+		count := 13 * (1 << b) / 2 // 6.5
+		if b == 0 {
+			count = 8
+		}
+		if runtime.OverLoadFactor(count, b) {
+			t.Errorf("OverLoadFactor(%d,%d)=true, want false", count, b)
+		}
+		if !runtime.OverLoadFactor(count+1, b) {
+			t.Errorf("OverLoadFactor(%d,%d)=false, want true", count+1, b)
+		}
+	}
+}
+
+func TestMapIterOrder(t *testing.T) {
+	sizes := []int{3, 7, 9, 15}
+	if abi.OldMapBucketCountBits >= 5 {
+		// it gets flaky (often only one iteration order) at size 3 when abi.MapBucketCountBits >=5.
+		t.Fatalf("This test becomes flaky if abi.MapBucketCountBits(=%d) is 5 or larger", abi.OldMapBucketCountBits)
+	}
+	for _, n := range sizes {
+		for i := 0; i < 1000; i++ {
+			// Make m be {0: true, 1: true, ..., n-1: true}.
+			m := make(map[int]bool)
+			for i := 0; i < n; i++ {
+				m[i] = true
+			}
+			// Check that iterating over the map produces at least two different orderings.
+			ord := func() []int {
+				var s []int
+				for key := range m {
+					s = append(s, key)
+				}
+				return s
+			}
+			first := ord()
+			ok := false
+			for try := 0; try < 100; try++ {
+				if !slices.Equal(first, ord()) {
+					ok = true
+					break
+				}
+			}
+			if !ok {
+				t.Errorf("Map with n=%d elements had consistent iteration order: %v", n, first)
+				break
+			}
+		}
+	}
+}
+
+const bs = abi.OldMapBucketCount
+
+// belowOverflow should be a pretty-full pair of buckets;
+// atOverflow is 1/8 bs larger = 13/8 buckets or two buckets
+// that are 13/16 full each, which is the overflow boundary.
+// Adding one to that should ensure overflow to the next higher size.
+const (
+	belowOverflow = bs * 3 / 2           // 1.5 bs = 2 buckets @ 75%
+	atOverflow    = belowOverflow + bs/8 // 2 buckets at 13/16 fill.
+)
+
+var mapBucketTests = [...]struct {
+	n        int // n is the number of map elements
+	noescape int // number of expected buckets for non-escaping map
+	escape   int // number of expected buckets for escaping map
+}{
+	{-(1 << 30), 1, 1},
+	{-1, 1, 1},
+	{0, 1, 1},
+	{1, 1, 1},
+	{bs, 1, 1},
+	{bs + 1, 2, 2},
+	{belowOverflow, 2, 2},  // 1.5 bs = 2 buckets @ 75%
+	{atOverflow + 1, 4, 4}, // 13/8 bs + 1 == overflow to 4
+
+	{2 * belowOverflow, 4, 4}, // 3 bs = 4 buckets @75%
+	{2*atOverflow + 1, 8, 8},  // 13/4 bs + 1 = overflow to 8
+
+	{4 * belowOverflow, 8, 8},  // 6 bs = 8 buckets @ 75%
+	{4*atOverflow + 1, 16, 16}, // 13/2 bs + 1 = overflow to 16
+}
+
+func TestMapBuckets(t *testing.T) {
+	// Test that maps of different sizes have the right number of buckets.
+	// Non-escaping maps with small buckets (like map[int]int) never
+	// have a nil bucket pointer due to starting with preallocated buckets
+	// on the stack. Escaping maps start with a non-nil bucket pointer if
+	// hint size is above bucketCnt and thereby have more than one bucket.
+	// These tests depend on bucketCnt and loadFactor* in map.go.
+	t.Run("mapliteral", func(t *testing.T) {
+		for _, tt := range mapBucketTests {
+			localMap := map[int]int{}
+			if runtime.MapBucketsPointerIsNil(localMap) {
+				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+			}
+			for i := 0; i < tt.n; i++ {
+				localMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+			}
+			escapingMap := runtime.Escape(map[int]int{})
+			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+			}
+			for i := 0; i < tt.n; i++ {
+				escapingMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+				t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+			}
+		}
+	})
+	t.Run("nohint", func(t *testing.T) {
+		for _, tt := range mapBucketTests {
+			localMap := make(map[int]int)
+			if runtime.MapBucketsPointerIsNil(localMap) {
+				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+			}
+			for i := 0; i < tt.n; i++ {
+				localMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+			}
+			escapingMap := runtime.Escape(make(map[int]int))
+			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+			}
+			for i := 0; i < tt.n; i++ {
+				escapingMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+			}
+		}
+	})
+	t.Run("makemap", func(t *testing.T) {
+		for _, tt := range mapBucketTests {
+			localMap := make(map[int]int, tt.n)
+			if runtime.MapBucketsPointerIsNil(localMap) {
+				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+			}
+			for i := 0; i < tt.n; i++ {
+				localMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+			}
+			escapingMap := runtime.Escape(make(map[int]int, tt.n))
+			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+			}
+			for i := 0; i < tt.n; i++ {
+				escapingMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+			}
+		}
+	})
+	t.Run("makemap64", func(t *testing.T) {
+		for _, tt := range mapBucketTests {
+			localMap := make(map[int]int, int64(tt.n))
+			if runtime.MapBucketsPointerIsNil(localMap) {
+				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
+			}
+			for i := 0; i < tt.n; i++ {
+				localMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
+				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
+			}
+			escapingMap := runtime.Escape(make(map[int]int, tt.n))
+			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
+				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
+			}
+			for i := 0; i < tt.n; i++ {
+				escapingMap[i] = i
+			}
+			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
+				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
+			}
+		}
+	})
+}
--- a/src/runtime/map_swiss.go
+++ b/src/runtime/map_swiss.go
@@ -0,0 +1,363 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime
+
+import (
+	"internal/abi"
+	"internal/runtime/maps"
+	"internal/runtime/sys"
+	"unsafe"
+)
+
+const (
+	// TODO: remove? These are used by tests but not the actual map
+	loadFactorNum = 7
+	loadFactorDen = 8
+)
+
+type maptype = abi.SwissMapType
+
+//go:linkname maps_errNilAssign internal/runtime/maps.errNilAssign
+var maps_errNilAssign error = plainError("assignment to entry in nil map")
+
+//go:linkname maps_mapKeyError internal/runtime/maps.mapKeyError
+func maps_mapKeyError(t *abi.SwissMapType, p unsafe.Pointer) error {
+	return mapKeyError(t, p)
+}
+
+func makemap64(t *abi.SwissMapType, hint int64, m *maps.Map) *maps.Map {
+	if int64(int(hint)) != hint {
+		hint = 0
+	}
+	return makemap(t, int(hint), m)
+}
+
+// makemap_small implements Go map creation for make(map[k]v) and
+// make(map[k]v, hint) when hint is known to be at most abi.SwissMapGroupSlots
+// at compile time and the map needs to be allocated on the heap.
+//
+// makemap_small should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname makemap_small
+func makemap_small() *maps.Map {
+	return maps.NewEmptyMap()
+}
+
+// makemap implements Go map creation for make(map[k]v, hint).
+// If the compiler has determined that the map or the first group
+// can be created on the stack, m and optionally m.dirPtr may be non-nil.
+// If m != nil, the map can be created directly in m.
+// If m.dirPtr != nil, it points to a group usable for a small map.
+//
+// makemap should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname makemap
+func makemap(t *abi.SwissMapType, hint int, m *maps.Map) *maps.Map {
+	if hint < 0 {
+		hint = 0
+	}
+
+	return maps.NewMap(t, uintptr(hint), m, maxAlloc)
+}
+
+// mapaccess1 returns a pointer to h[key].  Never returns nil, instead
+// it will return a reference to the zero object for the elem type if
+// the key is not in the map.
+// NOTE: The returned pointer may keep the whole map live, so don't
+// hold onto it for very long.
+//
+// mapaccess1 is pushed from internal/runtime/maps. We could just call it, but
+// we want to avoid one layer of call.
+//
+//go:linkname mapaccess1
+func mapaccess1(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) unsafe.Pointer
+
+// mapaccess2 should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapaccess2
+func mapaccess2(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) (unsafe.Pointer, bool)
+
+func mapaccess1_fat(t *abi.SwissMapType, m *maps.Map, key, zero unsafe.Pointer) unsafe.Pointer {
+	e := mapaccess1(t, m, key)
+	if e == unsafe.Pointer(&zeroVal[0]) {
+		return zero
+	}
+	return e
+}
+
+func mapaccess2_fat(t *abi.SwissMapType, m *maps.Map, key, zero unsafe.Pointer) (unsafe.Pointer, bool) {
+	e := mapaccess1(t, m, key)
+	if e == unsafe.Pointer(&zeroVal[0]) {
+		return zero, false
+	}
+	return e, true
+}
+
+// mapassign is pushed from internal/runtime/maps. We could just call it, but
+// we want to avoid one layer of call.
+//
+// mapassign should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/bytedance/sonic
+//   - github.com/RomiChan/protobuf
+//   - github.com/segmentio/encoding
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapassign
+func mapassign(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) unsafe.Pointer
+
+// mapdelete should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/ugorji/go/codec
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname mapdelete
+func mapdelete(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) {
+	if raceenabled && m != nil {
+		callerpc := sys.GetCallerPC()
+		pc := abi.FuncPCABIInternal(mapdelete)
+		racewritepc(unsafe.Pointer(m), callerpc, pc)
+		raceReadObjectPC(t.Key, key, callerpc, pc)
+	}
+	if msanenabled && m != nil {
+		msanread(key, t.Key.Size_)
+	}
+	if asanenabled && m != nil {
+		asanread(key, t.Key.Size_)
+	}
+
+	m.Delete(t, key)
+}
+
+// mapIterStart initializes the Iter struct used for ranging over maps and
+// performs the first step of iteration. The Iter struct pointed to by 'it' is
+// allocated on the stack by the compilers order pass or on the heap by
+// reflect. Both need to have zeroed it since the struct contains pointers.
+func mapIterStart(t *abi.SwissMapType, m *maps.Map, it *maps.Iter) {
+	if raceenabled && m != nil {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(m), callerpc, abi.FuncPCABIInternal(mapIterStart))
+	}
+
+	it.Init(t, m)
+	it.Next()
+}
+
+// mapIterNext performs the next step of iteration. Afterwards, the next
+// key/elem are in it.Key()/it.Elem().
+func mapIterNext(it *maps.Iter) {
+	if raceenabled {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(it.Map()), callerpc, abi.FuncPCABIInternal(mapIterNext))
+	}
+
+	it.Next()
+}
+
+// mapclear deletes all keys from a map.
+func mapclear(t *abi.SwissMapType, m *maps.Map) {
+	if raceenabled && m != nil {
+		callerpc := sys.GetCallerPC()
+		pc := abi.FuncPCABIInternal(mapclear)
+		racewritepc(unsafe.Pointer(m), callerpc, pc)
+	}
+
+	m.Clear(t)
+}
+
+// Reflect stubs. Called from ../reflect/asm_*.s
+
+// reflect_makemap is for package reflect,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - gitee.com/quant1x/gox
+//   - github.com/modern-go/reflect2
+//   - github.com/goccy/go-json
+//   - github.com/RomiChan/protobuf
+//   - github.com/segmentio/encoding
+//   - github.com/v2pro/plz
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_makemap reflect.makemap
+func reflect_makemap(t *abi.SwissMapType, cap int) *maps.Map {
+	// Check invariants and reflects math.
+	if t.Key.Equal == nil {
+		throw("runtime.reflect_makemap: unsupported map key type")
+	}
+	// TODO: other checks
+
+	return makemap(t, cap, nil)
+}
+
+// reflect_mapaccess is for package reflect,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - gitee.com/quant1x/gox
+//   - github.com/modern-go/reflect2
+//   - github.com/v2pro/plz
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_mapaccess reflect.mapaccess
+func reflect_mapaccess(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) unsafe.Pointer {
+	elem, ok := mapaccess2(t, m, key)
+	if !ok {
+		// reflect wants nil for a missing element
+		elem = nil
+	}
+	return elem
+}
+
+//go:linkname reflect_mapaccess_faststr reflect.mapaccess_faststr
+func reflect_mapaccess_faststr(t *abi.SwissMapType, m *maps.Map, key string) unsafe.Pointer {
+	elem, ok := mapaccess2_faststr(t, m, key)
+	if !ok {
+		// reflect wants nil for a missing element
+		elem = nil
+	}
+	return elem
+}
+
+// reflect_mapassign is for package reflect,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - gitee.com/quant1x/gox
+//   - github.com/v2pro/plz
+//
+// Do not remove or change the type signature.
+//
+//go:linkname reflect_mapassign reflect.mapassign0
+func reflect_mapassign(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer, elem unsafe.Pointer) {
+	p := mapassign(t, m, key)
+	typedmemmove(t.Elem, p, elem)
+}
+
+//go:linkname reflect_mapassign_faststr reflect.mapassign_faststr0
+func reflect_mapassign_faststr(t *abi.SwissMapType, m *maps.Map, key string, elem unsafe.Pointer) {
+	p := mapassign_faststr(t, m, key)
+	typedmemmove(t.Elem, p, elem)
+}
+
+//go:linkname reflect_mapdelete reflect.mapdelete
+func reflect_mapdelete(t *abi.SwissMapType, m *maps.Map, key unsafe.Pointer) {
+	mapdelete(t, m, key)
+}
+
+//go:linkname reflect_mapdelete_faststr reflect.mapdelete_faststr
+func reflect_mapdelete_faststr(t *abi.SwissMapType, m *maps.Map, key string) {
+	mapdelete_faststr(t, m, key)
+}
+
+// reflect_maplen is for package reflect,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/goccy/go-json
+//   - github.com/wI2L/jettison
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname reflect_maplen reflect.maplen
+func reflect_maplen(m *maps.Map) int {
+	if m == nil {
+		return 0
+	}
+	if raceenabled {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(m), callerpc, abi.FuncPCABIInternal(reflect_maplen))
+	}
+	return int(m.Used())
+}
+
+//go:linkname reflect_mapclear reflect.mapclear
+func reflect_mapclear(t *abi.SwissMapType, m *maps.Map) {
+	mapclear(t, m)
+}
+
+//go:linkname reflectlite_maplen internal/reflectlite.maplen
+func reflectlite_maplen(m *maps.Map) int {
+	if m == nil {
+		return 0
+	}
+	if raceenabled {
+		callerpc := sys.GetCallerPC()
+		racereadpc(unsafe.Pointer(m), callerpc, abi.FuncPCABIInternal(reflect_maplen))
+	}
+	return int(m.Used())
+}
+
+// mapinitnoop is a no-op function known the Go linker; if a given global
+// map (of the right size) is determined to be dead, the linker will
+// rewrite the relocation (from the package init func) from the outlined
+// map init function to this symbol. Defined in assembly so as to avoid
+// complications with instrumentation (coverage, etc).
+func mapinitnoop()
+
+// mapclone for implementing maps.Clone
+//
+//go:linkname mapclone maps.clone
+func mapclone(m any) any {
+	e := efaceOf(&m)
+	e.data = unsafe.Pointer(mapclone2((*abi.SwissMapType)(unsafe.Pointer(e._type)), (*maps.Map)(e.data)))
+	return m
+}
+
+func mapclone2(t *abi.SwissMapType, src *maps.Map) *maps.Map {
+	dst := makemap(t, int(src.Used()), nil)
+
+	var iter maps.Iter
+	iter.Init(t, src)
+	for iter.Next(); iter.Key() != nil; iter.Next() {
+		dst.Put(t, iter.Key(), iter.Elem())
+	}
+
+	return dst
+}
+
+// keys for implementing maps.keys
+//
+//go:linkname keys maps.keys
+func keys(m any, p unsafe.Pointer) {
+	// Currently unused in the maps package.
+	panic("unimplemented")
+}
+
+// values for implementing maps.values
+//
+//go:linkname values maps.values
+func values(m any, p unsafe.Pointer) {
+	// Currently unused in the maps package.
+	panic("unimplemented")
+}
--- a/src/runtime/map_swiss_test.go
+++ b/src/runtime/map_swiss_test.go
@@ -0,0 +1,75 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.swissmap
+
+package runtime_test
+
+import (
+	"internal/abi"
+	"internal/goarch"
+	"internal/runtime/maps"
+	"slices"
+	"testing"
+	"unsafe"
+)
+
+func TestHmapSize(t *testing.T) {
+	// The structure of Map is defined in internal/runtime/maps/map.go
+	// and in cmd/compile/internal/reflectdata/map_swiss.go and must be in sync.
+	// The size of Map should be 48 bytes on 64 bit and 32 bytes on 32 bit platforms.
+	wantSize := uintptr(2*8 + 4*goarch.PtrSize)
+	gotSize := unsafe.Sizeof(maps.Map{})
+	if gotSize != wantSize {
+		t.Errorf("sizeof(maps.Map{})==%d, want %d", gotSize, wantSize)
+	}
+}
+
+// See also reflect_test.TestGroupSizeZero.
+func TestGroupSizeZero(t *testing.T) {
+	var m map[struct{}]struct{}
+	mTyp := abi.TypeOf(m)
+	mt := (*abi.SwissMapType)(unsafe.Pointer(mTyp))
+
+	// internal/runtime/maps when create pointers to slots, even if slots
+	// are size 0. The compiler should have reserved an extra word to
+	// ensure that pointers to the zero-size type at the end of group are
+	// valid.
+	if mt.Group.Size() <= 8 {
+		t.Errorf("Group size got %d want >8", mt.Group.Size())
+	}
+}
+
+func TestMapIterOrder(t *testing.T) {
+	sizes := []int{3, 7, 9, 15}
+	for _, n := range sizes {
+		for i := 0; i < 1000; i++ {
+			// Make m be {0: true, 1: true, ..., n-1: true}.
+			m := make(map[int]bool)
+			for i := 0; i < n; i++ {
+				m[i] = true
+			}
+			// Check that iterating over the map produces at least two different orderings.
+			ord := func() []int {
+				var s []int
+				for key := range m {
+					s = append(s, key)
+				}
+				return s
+			}
+			first := ord()
+			ok := false
+			for try := 0; try < 100; try++ {
+				if !slices.Equal(first, ord()) {
+					ok = true
+					break
+				}
+			}
+			if !ok {
+				t.Errorf("Map with n=%d elements had consistent iteration order: %v", n, first)
+				break
+			}
+		}
+	}
+}
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -6,8 +6,7 @@ package runtime_test

 import (
 	"fmt"
-	"internal/abi"
-	"internal/goarch"
+	"internal/goexperiment"
 	"internal/testenv"
 	"math"
 	"os"
@@ -21,17 +20,6 @@ import (
 	"unsafe"
 )

-func TestHmapSize(t *testing.T) {
-	// The structure of hmap is defined in runtime/map.go
-	// and in cmd/compile/internal/gc/reflect.go and must be in sync.
-	// The size of hmap should be 48 bytes on 64 bit and 28 bytes on 32 bit platforms.
-	var hmapSize = uintptr(8 + 5*goarch.PtrSize)
-	if runtime.RuntimeHmapSize != hmapSize {
-		t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize)
-	}
-
-}
-
 // negative zero is a good test because:
 //  1. 0 and -0 are equal, yet have distinct representations.
 //  2. 0 is represented as all zeros, -0 isn't.
@@ -144,7 +132,7 @@ func TestMapAppendAssignment(t *testing.T) {
 	m[0] = append(m[0], a...)

 	want := []int{12345, 67890, 123, 456, 7, 8, 9, 0}
-	if got := m[0]; !reflect.DeepEqual(got, want) {
+	if got := m[0]; !slices.Equal(got, want) {
 		t.Errorf("got %v, want %v", got, want)
 	}
 }
@@ -431,6 +419,12 @@ func TestEmptyKeyAndValue(t *testing.T) {
 	if len(a) != 1 {
 		t.Errorf("empty value insert problem")
 	}
+	if len(b) != 1 {
+		t.Errorf("empty key insert problem")
+	}
+	if len(c) != 1 {
+		t.Errorf("empty key+value insert problem")
+	}
 	if b[empty{}] != 1 {
 		t.Errorf("empty key returned wrong value")
 	}
@@ -509,43 +503,6 @@ func TestMapNanGrowIterator(t *testing.T) {
 	}
 }

-func TestMapIterOrder(t *testing.T) {
-	sizes := []int{3, 7, 9, 15}
-	if abi.MapBucketCountBits >= 5 {
-		// it gets flaky (often only one iteration order) at size 3 when abi.MapBucketCountBits >=5.
-		t.Fatalf("This test becomes flaky if abi.MapBucketCountBits(=%d) is 5 or larger", abi.MapBucketCountBits)
-	}
-	for _, n := range sizes {
-		for i := 0; i < 1000; i++ {
-			// Make m be {0: true, 1: true, ..., n-1: true}.
-			m := make(map[int]bool)
-			for i := 0; i < n; i++ {
-				m[i] = true
-			}
-			// Check that iterating over the map produces at least two different orderings.
-			ord := func() []int {
-				var s []int
-				for key := range m {
-					s = append(s, key)
-				}
-				return s
-			}
-			first := ord()
-			ok := false
-			for try := 0; try < 100; try++ {
-				if !reflect.DeepEqual(first, ord()) {
-					ok = true
-					break
-				}
-			}
-			if !ok {
-				t.Errorf("Map with n=%d elements had consistent iteration order: %v", n, first)
-				break
-			}
-		}
-	}
-}
-
 // Issue 8410
 func TestMapSparseIterOrder(t *testing.T) {
 	// Run several rounds to increase the probability
@@ -582,6 +539,38 @@ NextRound:
 	}
 }

+// Map iteration must not return duplicate entries.
+func TestMapIterDuplicate(t *testing.T) {
+	// Run several rounds to increase the probability
+	// of failure. One is not enough.
+	for range 1000 {
+		m := make(map[int]bool)
+		// Add 1000 items, remove 980.
+		for i := 0; i < 1000; i++ {
+			m[i] = true
+		}
+		for i := 20; i < 1000; i++ {
+			delete(m, i)
+		}
+
+		var want []int
+		for i := 0; i < 20; i++ {
+			want = append(want, i)
+		}
+
+		var got []int
+		for i := range m {
+			got = append(got, i)
+		}
+
+		slices.Sort(got)
+
+		if !reflect.DeepEqual(got, want) {
+			t.Errorf("iteration got %v want %v\n", got, want)
+		}
+	}
+}
+
 func TestMapStringBytesLookup(t *testing.T) {
 	// Use large string keys to avoid small-allocation coalescing,
 	// which can cause AllocsPerRun to report lower counts than it should.
@@ -682,165 +671,6 @@ func TestIgnoreBogusMapHint(t *testing.T) {
 	}
 }

-const bs = abi.MapBucketCount
-
-// belowOverflow should be a pretty-full pair of buckets;
-// atOverflow is 1/8 bs larger = 13/8 buckets or two buckets
-// that are 13/16 full each, which is the overflow boundary.
-// Adding one to that should ensure overflow to the next higher size.
-const (
-	belowOverflow = bs * 3 / 2           // 1.5 bs = 2 buckets @ 75%
-	atOverflow    = belowOverflow + bs/8 // 2 buckets at 13/16 fill.
-)
-
-var mapBucketTests = [...]struct {
-	n        int // n is the number of map elements
-	noescape int // number of expected buckets for non-escaping map
-	escape   int // number of expected buckets for escaping map
-}{
-	{-(1 << 30), 1, 1},
-	{-1, 1, 1},
-	{0, 1, 1},
-	{1, 1, 1},
-	{bs, 1, 1},
-	{bs + 1, 2, 2},
-	{belowOverflow, 2, 2},  // 1.5 bs = 2 buckets @ 75%
-	{atOverflow + 1, 4, 4}, // 13/8 bs + 1 == overflow to 4
-
-	{2 * belowOverflow, 4, 4}, // 3 bs = 4 buckets @75%
-	{2*atOverflow + 1, 8, 8},  // 13/4 bs + 1 = overflow to 8
-
-	{4 * belowOverflow, 8, 8},  // 6 bs = 8 buckets @ 75%
-	{4*atOverflow + 1, 16, 16}, // 13/2 bs + 1 = overflow to 16
-}
-
-func TestMapBuckets(t *testing.T) {
-	// Test that maps of different sizes have the right number of buckets.
-	// Non-escaping maps with small buckets (like map[int]int) never
-	// have a nil bucket pointer due to starting with preallocated buckets
-	// on the stack. Escaping maps start with a non-nil bucket pointer if
-	// hint size is above bucketCnt and thereby have more than one bucket.
-	// These tests depend on bucketCnt and loadFactor* in map.go.
-	t.Run("mapliteral", func(t *testing.T) {
-		for _, tt := range mapBucketTests {
-			localMap := map[int]int{}
-			if runtime.MapBucketsPointerIsNil(localMap) {
-				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
-			}
-			for i := 0; i < tt.n; i++ {
-				localMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
-				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
-			}
-			escapingMap := runtime.Escape(map[int]int{})
-			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
-				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
-			}
-			for i := 0; i < tt.n; i++ {
-				escapingMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
-				t.Errorf("escape n=%d want %d buckets, got %d", tt.n, tt.escape, got)
-			}
-		}
-	})
-	t.Run("nohint", func(t *testing.T) {
-		for _, tt := range mapBucketTests {
-			localMap := make(map[int]int)
-			if runtime.MapBucketsPointerIsNil(localMap) {
-				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
-			}
-			for i := 0; i < tt.n; i++ {
-				localMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
-				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
-			}
-			escapingMap := runtime.Escape(make(map[int]int))
-			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
-				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
-			}
-			for i := 0; i < tt.n; i++ {
-				escapingMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
-				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
-			}
-		}
-	})
-	t.Run("makemap", func(t *testing.T) {
-		for _, tt := range mapBucketTests {
-			localMap := make(map[int]int, tt.n)
-			if runtime.MapBucketsPointerIsNil(localMap) {
-				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
-			}
-			for i := 0; i < tt.n; i++ {
-				localMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
-				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
-			}
-			escapingMap := runtime.Escape(make(map[int]int, tt.n))
-			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
-				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
-			}
-			for i := 0; i < tt.n; i++ {
-				escapingMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
-				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
-			}
-		}
-	})
-	t.Run("makemap64", func(t *testing.T) {
-		for _, tt := range mapBucketTests {
-			localMap := make(map[int]int, int64(tt.n))
-			if runtime.MapBucketsPointerIsNil(localMap) {
-				t.Errorf("no escape: buckets pointer is nil for non-escaping map")
-			}
-			for i := 0; i < tt.n; i++ {
-				localMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(localMap); got != tt.noescape {
-				t.Errorf("no escape: n=%d want %d buckets, got %d", tt.n, tt.noescape, got)
-			}
-			escapingMap := runtime.Escape(make(map[int]int, tt.n))
-			if count := runtime.MapBucketsCount(escapingMap); count > 1 && runtime.MapBucketsPointerIsNil(escapingMap) {
-				t.Errorf("escape: buckets pointer is nil for n=%d buckets", count)
-			}
-			for i := 0; i < tt.n; i++ {
-				escapingMap[i] = i
-			}
-			if got := runtime.MapBucketsCount(escapingMap); got != tt.escape {
-				t.Errorf("escape: n=%d want %d buckets, got %d", tt.n, tt.escape, got)
-			}
-		}
-	})
-
-}
-
-func benchmarkMapPop(b *testing.B, n int) {
-	m := map[int]int{}
-	for i := 0; i < b.N; i++ {
-		for j := 0; j < n; j++ {
-			m[j] = j
-		}
-		for j := 0; j < n; j++ {
-			// Use iterator to pop an element.
-			// We want this to be fast, see issue 8412.
-			for k := range m {
-				delete(m, k)
-				break
-			}
-		}
-	}
-}
-
-func BenchmarkMapPop100(b *testing.B)   { benchmarkMapPop(b, 100) }
-func BenchmarkMapPop1000(b *testing.B)  { benchmarkMapPop(b, 1000) }
-func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) }
-
 var testNonEscapingMapVariable int = 8

 func TestNonEscapingMap(t *testing.T) {
@@ -849,224 +679,32 @@ func TestNonEscapingMap(t *testing.T) {
 		m[0] = 0
 	})
 	if n != 0 {
-		t.Fatalf("mapliteral: want 0 allocs, got %v", n)
+		t.Errorf("mapliteral: want 0 allocs, got %v", n)
 	}
 	n = testing.AllocsPerRun(1000, func() {
 		m := make(map[int]int)
 		m[0] = 0
 	})
 	if n != 0 {
-		t.Fatalf("no hint: want 0 allocs, got %v", n)
+		t.Errorf("no hint: want 0 allocs, got %v", n)
 	}
 	n = testing.AllocsPerRun(1000, func() {
 		m := make(map[int]int, 8)
 		m[0] = 0
 	})
 	if n != 0 {
-		t.Fatalf("with small hint: want 0 allocs, got %v", n)
+		t.Errorf("with small hint: want 0 allocs, got %v", n)
 	}
 	n = testing.AllocsPerRun(1000, func() {
 		m := make(map[int]int, testNonEscapingMapVariable)
 		m[0] = 0
 	})
 	if n != 0 {
-		t.Fatalf("with variable hint: want 0 allocs, got %v", n)
+		t.Errorf("with variable hint: want 0 allocs, got %v", n)
 	}

 }

-func benchmarkMapAssignInt32(b *testing.B, n int) {
-	a := make(map[int32]int)
-	for i := 0; i < b.N; i++ {
-		a[int32(i&(n-1))] = i
-	}
-}
-
-func benchmarkMapOperatorAssignInt32(b *testing.B, n int) {
-	a := make(map[int32]int)
-	for i := 0; i < b.N; i++ {
-		a[int32(i&(n-1))] += i
-	}
-}
-
-func benchmarkMapAppendAssignInt32(b *testing.B, n int) {
-	a := make(map[int32][]int)
-	b.ReportAllocs()
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		key := int32(i & (n - 1))
-		a[key] = append(a[key], i)
-	}
-}
-
-func benchmarkMapDeleteInt32(b *testing.B, n int) {
-	a := make(map[int32]int, n)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if len(a) == 0 {
-			b.StopTimer()
-			for j := i; j < i+n; j++ {
-				a[int32(j)] = j
-			}
-			b.StartTimer()
-		}
-		delete(a, int32(i))
-	}
-}
-
-func benchmarkMapAssignInt64(b *testing.B, n int) {
-	a := make(map[int64]int)
-	for i := 0; i < b.N; i++ {
-		a[int64(i&(n-1))] = i
-	}
-}
-
-func benchmarkMapOperatorAssignInt64(b *testing.B, n int) {
-	a := make(map[int64]int)
-	for i := 0; i < b.N; i++ {
-		a[int64(i&(n-1))] += i
-	}
-}
-
-func benchmarkMapAppendAssignInt64(b *testing.B, n int) {
-	a := make(map[int64][]int)
-	b.ReportAllocs()
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		key := int64(i & (n - 1))
-		a[key] = append(a[key], i)
-	}
-}
-
-func benchmarkMapDeleteInt64(b *testing.B, n int) {
-	a := make(map[int64]int, n)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if len(a) == 0 {
-			b.StopTimer()
-			for j := i; j < i+n; j++ {
-				a[int64(j)] = j
-			}
-			b.StartTimer()
-		}
-		delete(a, int64(i))
-	}
-}
-
-func benchmarkMapAssignStr(b *testing.B, n int) {
-	k := make([]string, n)
-	for i := 0; i < len(k); i++ {
-		k[i] = strconv.Itoa(i)
-	}
-	b.ResetTimer()
-	a := make(map[string]int)
-	for i := 0; i < b.N; i++ {
-		a[k[i&(n-1)]] = i
-	}
-}
-
-func benchmarkMapOperatorAssignStr(b *testing.B, n int) {
-	k := make([]string, n)
-	for i := 0; i < len(k); i++ {
-		k[i] = strconv.Itoa(i)
-	}
-	b.ResetTimer()
-	a := make(map[string]string)
-	for i := 0; i < b.N; i++ {
-		key := k[i&(n-1)]
-		a[key] += key
-	}
-}
-
-func benchmarkMapAppendAssignStr(b *testing.B, n int) {
-	k := make([]string, n)
-	for i := 0; i < len(k); i++ {
-		k[i] = strconv.Itoa(i)
-	}
-	a := make(map[string][]string)
-	b.ReportAllocs()
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		key := k[i&(n-1)]
-		a[key] = append(a[key], key)
-	}
-}
-
-func benchmarkMapDeleteStr(b *testing.B, n int) {
-	i2s := make([]string, n)
-	for i := 0; i < n; i++ {
-		i2s[i] = strconv.Itoa(i)
-	}
-	a := make(map[string]int, n)
-	b.ResetTimer()
-	k := 0
-	for i := 0; i < b.N; i++ {
-		if len(a) == 0 {
-			b.StopTimer()
-			for j := 0; j < n; j++ {
-				a[i2s[j]] = j
-			}
-			k = i
-			b.StartTimer()
-		}
-		delete(a, i2s[i-k])
-	}
-}
-
-func benchmarkMapDeletePointer(b *testing.B, n int) {
-	i2p := make([]*int, n)
-	for i := 0; i < n; i++ {
-		i2p[i] = new(int)
-	}
-	a := make(map[*int]int, n)
-	b.ResetTimer()
-	k := 0
-	for i := 0; i < b.N; i++ {
-		if len(a) == 0 {
-			b.StopTimer()
-			for j := 0; j < n; j++ {
-				a[i2p[j]] = j
-			}
-			k = i
-			b.StartTimer()
-		}
-		delete(a, i2p[i-k])
-	}
-}
-
-func runWith(f func(*testing.B, int), v ...int) func(*testing.B) {
-	return func(b *testing.B) {
-		for _, n := range v {
-			b.Run(strconv.Itoa(n), func(b *testing.B) { f(b, n) })
-		}
-	}
-}
-
-func BenchmarkMapAssign(b *testing.B) {
-	b.Run("Int32", runWith(benchmarkMapAssignInt32, 1<<8, 1<<16))
-	b.Run("Int64", runWith(benchmarkMapAssignInt64, 1<<8, 1<<16))
-	b.Run("Str", runWith(benchmarkMapAssignStr, 1<<8, 1<<16))
-}
-
-func BenchmarkMapOperatorAssign(b *testing.B) {
-	b.Run("Int32", runWith(benchmarkMapOperatorAssignInt32, 1<<8, 1<<16))
-	b.Run("Int64", runWith(benchmarkMapOperatorAssignInt64, 1<<8, 1<<16))
-	b.Run("Str", runWith(benchmarkMapOperatorAssignStr, 1<<8, 1<<16))
-}
-
-func BenchmarkMapAppendAssign(b *testing.B) {
-	b.Run("Int32", runWith(benchmarkMapAppendAssignInt32, 1<<8, 1<<16))
-	b.Run("Int64", runWith(benchmarkMapAppendAssignInt64, 1<<8, 1<<16))
-	b.Run("Str", runWith(benchmarkMapAppendAssignStr, 1<<8, 1<<16))
-}
-
-func BenchmarkMapDelete(b *testing.B) {
-	b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
-	b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
-	b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
-	b.Run("Pointer", runWith(benchmarkMapDeletePointer, 100, 1000, 10000))
-}
-
 func TestDeferDeleteSlow(t *testing.T) {
 	ks := []complex128{0, 1, 2, 3}

@@ -1422,22 +1060,11 @@ func TestEmptyMapWithInterfaceKey(t *testing.T) {
 	})
 }

-func TestLoadFactor(t *testing.T) {
-	for b := uint8(0); b < 20; b++ {
-		count := 13 * (1 << b) / 2 // 6.5
-		if b == 0 {
-			count = 8
-		}
-		if runtime.OverLoadFactor(count, b) {
-			t.Errorf("OverLoadFactor(%d,%d)=true, want false", count, b)
-		}
-		if !runtime.OverLoadFactor(count+1, b) {
-			t.Errorf("OverLoadFactor(%d,%d)=false, want true", count+1, b)
-		}
-	}
-}
-
 func TestMapKeys(t *testing.T) {
+	if goexperiment.SwissMap {
+		t.Skip("mapkeys not implemented for swissmaps")
+	}
+
 	type key struct {
 		s   string
 		pad [128]byte // sizeof(key) > abi.MapMaxKeyBytes
@@ -1453,6 +1080,10 @@ func TestMapKeys(t *testing.T) {
 }

 func TestMapValues(t *testing.T) {
+	if goexperiment.SwissMap {
+		t.Skip("mapvalues not implemented for swissmaps")
+	}
+
 	type val struct {
 		s   string
 		pad [128]byte // sizeof(val) > abi.MapMaxElemBytes
@@ -1544,3 +1175,30 @@ func TestMemHashGlobalSeed(t *testing.T) {
 		}
 	})
 }
+
+func TestMapIterDeleteReplace(t *testing.T) {
+	inc := 1
+	if testing.Short() {
+		inc = 100
+	}
+	for i := 0; i < 10000; i += inc {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			m := make(map[int]bool)
+			for j := range i {
+				m[j] = false
+			}
+
+			// Delete and replace all entries.
+			for k := range m {
+				delete(m, k)
+				m[k] = true
+			}
+
+			for k, v := range m {
+				if !v {
+					t.Errorf("m[%d] got false want true", k)
+				}
+			}
+		})
+	}
+}
--- a/src/runtime/mbarrier.go
+++ b/src/runtime/mbarrier.go
@@ -17,6 +17,7 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/goexperiment"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -91,19 +92,6 @@ import (
 // barriers, which will slow down both the mutator and the GC, we always grey
 // the ptr object regardless of the slot's color.
 //
-// Another place where we intentionally omit memory barriers is when
-// accessing mheap_.arena_used to check if a pointer points into the
-// heap. On relaxed memory machines, it's possible for a mutator to
-// extend the size of the heap by updating arena_used, allocate an
-// object from this new region, and publish a pointer to that object,
-// but for tracing running on another processor to observe the pointer
-// but use the old value of arena_used. In this case, tracing will not
-// mark the object, even though it's reachable. However, the mutator
-// is guaranteed to execute a write barrier when it publishes the
-// pointer, so it will take care of marking the object. A general
-// consequence of this is that the garbage collector may cache the
-// value of mheap_.arena_used. (See issue #9984.)
-//
 //
 // Stack writes:
 //
@@ -224,8 +212,8 @@ func wbMove(typ *_type, dst, src unsafe.Pointer) {
 //go:linkname reflect_typedmemmove reflect.typedmemmove
 func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 	if raceenabled {
-		raceWriteObjectPC(typ, dst, getcallerpc(), abi.FuncPCABIInternal(reflect_typedmemmove))
-		raceReadObjectPC(typ, src, getcallerpc(), abi.FuncPCABIInternal(reflect_typedmemmove))
+		raceWriteObjectPC(typ, dst, sys.GetCallerPC(), abi.FuncPCABIInternal(reflect_typedmemmove))
+		raceReadObjectPC(typ, src, sys.GetCallerPC(), abi.FuncPCABIInternal(reflect_typedmemmove))
 	}
 	if msanenabled {
 		msanwrite(dst, typ.Size_)
@@ -243,6 +231,11 @@ func reflectlite_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 	reflect_typedmemmove(typ, dst, src)
 }

+//go:linkname maps_typedmemmove internal/runtime/maps.typedmemmove
+func maps_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
+	typedmemmove(typ, dst, src)
+}
+
 // reflectcallmove is invoked by reflectcall to copy the return values
 // out of the stack and into the heap, invoking the necessary write
 // barriers. dst, src, and size describe the return value area to
@@ -294,7 +287,7 @@ func typedslicecopy(typ *_type, dstPtr unsafe.Pointer, dstLen int, srcPtr unsafe
 	// assignment operations, it's not instrumented in the calling
 	// code and needs its own instrumentation.
 	if raceenabled {
-		callerpc := getcallerpc()
+		callerpc := sys.GetCallerPC()
 		pc := abi.FuncPCABIInternal(slicecopy)
 		racewriterangepc(dstPtr, uintptr(n)*typ.Size_, callerpc, pc)
 		racereadrangepc(srcPtr, uintptr(n)*typ.Size_, callerpc, pc)
@@ -375,7 +368,7 @@ func typedmemclr(typ *_type, ptr unsafe.Pointer) {
 	memclrNoHeapPointers(ptr, typ.Size_)
 }

-// reflect_typedslicecopy is meant for package reflect,
+// reflect_typedmemclr is meant for package reflect,
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
 //   - github.com/ugorji/go/codec
@@ -388,6 +381,11 @@ func reflect_typedmemclr(typ *_type, ptr unsafe.Pointer) {
 	typedmemclr(typ, ptr)
 }

+//go:linkname maps_typedmemclr internal/runtime/maps.typedmemclr
+func maps_typedmemclr(typ *_type, ptr unsafe.Pointer) {
+	typedmemclr(typ, ptr)
+}
+
 //go:linkname reflect_typedmemclrpartial reflect.typedmemclrpartial
 func reflect_typedmemclrpartial(typ *_type, ptr unsafe.Pointer, off, size uintptr) {
 	if writeBarrier.enabled && typ.Pointers() {
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -59,7 +59,7 @@ import (
 	"internal/abi"
 	"internal/goarch"
 	"internal/runtime/atomic"
-	"runtime/internal/sys"
+	"internal/runtime/sys"
 	"unsafe"
 )

@@ -197,15 +197,14 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers {
 			return typePointers{}
 		}
 	}
-	gcdata := typ.GCData
-	return typePointers{elem: addr, addr: addr, mask: readUintptr(gcdata), typ: typ}
+	gcmask := getGCMask(typ)
+	return typePointers{elem: addr, addr: addr, mask: readUintptr(gcmask), typ: typ}
 }

 // typePointersOfType is like typePointersOf, but assumes addr points to one or more
-// contiguous instances of the provided type. The provided type must not be nil and
-// it must not have its type metadata encoded as a gcprog.
+// contiguous instances of the provided type. The provided type must not be nil.
 //
-// It returns an iterator that tiles typ.GCData starting from addr. It's the caller's
+// It returns an iterator that tiles typ's gcmask starting from addr. It's the caller's
 // responsibility to limit iteration.
 //
 // nosplit because its callers are nosplit and require all their callees to be nosplit.
@@ -213,15 +212,15 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers {
 //go:nosplit
 func (span *mspan) typePointersOfType(typ *abi.Type, addr uintptr) typePointers {
 	const doubleCheck = false
-	if doubleCheck && (typ == nil || typ.Kind_&abi.KindGCProg != 0) {
+	if doubleCheck && typ == nil {
 		throw("bad type passed to typePointersOfType")
 	}
 	if span.spanclass.noscan() {
 		return typePointers{}
 	}
 	// Since we have the type, pretend we have a header.
-	gcdata := typ.GCData
-	return typePointers{elem: addr, addr: addr, mask: readUintptr(gcdata), typ: typ}
+	gcmask := getGCMask(typ)
+	return typePointers{elem: addr, addr: addr, mask: readUintptr(gcmask), typ: typ}
 }

 // nextFast is the fast path of next. nextFast is written to be inlineable and,
@@ -295,7 +294,7 @@ func (tp typePointers) next(limit uintptr) (typePointers, uintptr) {
 		}

 		// Grab more bits and try again.
-		tp.mask = readUintptr(addb(tp.typ.GCData, (tp.addr-tp.elem)/goarch.PtrSize/8))
+		tp.mask = readUintptr(addb(getGCMask(tp.typ), (tp.addr-tp.elem)/goarch.PtrSize/8))
 		if tp.addr+goarch.PtrSize*ptrBits > limit {
 			bits := (tp.addr + goarch.PtrSize*ptrBits - limit) / goarch.PtrSize
 			tp.mask &^= ((1 << (bits)) - 1) << (ptrBits - bits)
@@ -345,7 +344,7 @@ func (tp typePointers) fastForward(n, limit uintptr) typePointers {
 		// Move up to the next element.
 		tp.elem += tp.typ.Size_
 		tp.addr = tp.elem
-		tp.mask = readUintptr(tp.typ.GCData)
+		tp.mask = readUintptr(getGCMask(tp.typ))

 		// We may have exceeded the limit after this. Bail just like next does.
 		if tp.addr >= limit {
@@ -354,7 +353,7 @@ func (tp typePointers) fastForward(n, limit uintptr) typePointers {
 	} else {
 		// Grab the mask, but then clear any bits before the target address and any
 		// bits over the limit.
-		tp.mask = readUintptr(addb(tp.typ.GCData, (tp.addr-tp.elem)/goarch.PtrSize/8))
+		tp.mask = readUintptr(addb(getGCMask(tp.typ), (tp.addr-tp.elem)/goarch.PtrSize/8))
 		tp.mask &^= (1 << ((target - tp.addr) / goarch.PtrSize)) - 1
 	}
 	if tp.addr+goarch.PtrSize*ptrBits > limit {
@@ -457,7 +456,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr, typ *abi.Type) {
 	}

 	var tp typePointers
-	if typ != nil && typ.Kind_&abi.KindGCProg == 0 {
+	if typ != nil {
 		tp = s.typePointersOfType(typ, dst)
 	} else {
 		tp = s.typePointersOf(dst, size)
@@ -518,7 +517,7 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr, typ *abi.Type) {
 	}

 	var tp typePointers
-	if typ != nil && typ.Kind_&abi.KindGCProg == 0 {
+	if typ != nil {
 		tp = s.typePointersOfType(typ, dst)
 	} else {
 		tp = s.typePointersOf(dst, size)
@@ -535,12 +534,13 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr, typ *abi.Type) {
 }

 // initHeapBits initializes the heap bitmap for a span.
-//
-// TODO(mknyszek): This should set the heap bits for single pointer
-// allocations eagerly to avoid calling heapSetType at allocation time,
-// just to write one bit.
-func (s *mspan) initHeapBits(forceClear bool) {
-	if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk {
+func (s *mspan) initHeapBits() {
+	if goarch.PtrSize == 8 && !s.spanclass.noscan() && s.spanclass.sizeclass() == 1 {
+		b := s.heapBits()
+		for i := range b {
+			b[i] = ^uintptr(0)
+		}
+	} else if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk {
 		b := s.heapBits()
 		clear(b)
 	}
@@ -640,37 +640,50 @@ func (span *mspan) heapBitsSmallForAddr(addr uintptr) uintptr {
 //go:nosplit
 func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize uintptr) {
 	// The objects here are always really small, so a single load is sufficient.
-	src0 := readUintptr(typ.GCData)
+	src0 := readUintptr(getGCMask(typ))

-	// Create repetitions of the bitmap if we have a small array.
-	bits := span.elemsize / goarch.PtrSize
+	// Create repetitions of the bitmap if we have a small slice backing store.
 	scanSize = typ.PtrBytes
 	src := src0
-	switch typ.Size_ {
-	case goarch.PtrSize:
+	if typ.Size_ == goarch.PtrSize {
 		src = (1 << (dataSize / goarch.PtrSize)) - 1
-	default:
+	} else {
+		// N.B. We rely on dataSize being an exact multiple of the type size.
+		// The alternative is to be defensive and mask out src to the length
+		// of dataSize. The purpose is to save on one additional masking operation.
+		if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 {
+			throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_")
+		}
 		for i := typ.Size_; i < dataSize; i += typ.Size_ {
 			src |= src0 << (i / goarch.PtrSize)
 			scanSize += typ.Size_
 		}
+		if asanenabled {
+			// Mask src down to dataSize. dataSize is going to be a strange size because of
+			// the redzone required for allocations when asan is enabled.
+			src &= (1 << (dataSize / goarch.PtrSize)) - 1
+		}
 	}

 	// Since we're never writing more than one uintptr's worth of bits, we're either going
 	// to do one or two writes.
-	dst := span.heapBits()
+	dst := unsafe.Pointer(span.base() + pageSize - pageSize/goarch.PtrSize/8)
 	o := (x - span.base()) / goarch.PtrSize
 	i := o / ptrBits
 	j := o % ptrBits
+	bits := span.elemsize / goarch.PtrSize
 	if j+bits > ptrBits {
 		// Two writes.
 		bits0 := ptrBits - j
 		bits1 := bits - bits0
-		dst[i+0] = dst[i+0]&(^uintptr(0)>>bits0) | (src << j)
-		dst[i+1] = dst[i+1]&^((1<<bits1)-1) | (src >> bits0)
+		dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize))
+		dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize))
+		*dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j)
+		*dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0)
 	} else {
 		// One write.
-		dst[i] = (dst[i] &^ (((1 << bits) - 1) << j)) | (src << j)
+		dst := (*uintptr)(add(dst, i*goarch.PtrSize))
+		*dst = (*dst)&^(((1<<bits)-1)<<j) | (src << j)
 	}

 	const doubleCheck = false
@@ -686,97 +699,81 @@ func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize
 	return
 }

-// heapSetType records that the new allocation [x, x+size)
+// heapSetType* functions record that the new allocation [x, x+size)
 // holds in [x, x+dataSize) one or more values of type typ.
 // (The number of values is given by dataSize / typ.Size.)
 // If dataSize < size, the fragment [x+dataSize, x+size) is
 // recorded as non-pointer data.
 // It is known that the type has pointers somewhere;
-// malloc does not call heapSetType when there are no pointers.
+// malloc does not call heapSetType* when there are no pointers.
 //
-// There can be read-write races between heapSetType and things
+// There can be read-write races between heapSetType* and things
 // that read the heap metadata like scanobject. However, since
-// heapSetType is only used for objects that have not yet been
+// heapSetType* is only used for objects that have not yet been
 // made reachable, readers will ignore bits being modified by this
 // function. This does mean this function cannot transiently modify
 // shared memory that belongs to neighboring objects. Also, on weakly-ordered
 // machines, callers must execute a store/store (publication) barrier
 // between calling this function and making the object reachable.
-func heapSetType(x, dataSize uintptr, typ *_type, header **_type, span *mspan) (scanSize uintptr) {
-	const doubleCheck = false

+const doubleCheckHeapSetType = doubleCheckMalloc
+
+func heapSetTypeNoHeader(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
+	if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) {
+		throw("tried to write heap bits, but no heap bits in span")
+	}
+	scanSize := span.writeHeapBitsSmall(x, dataSize, typ)
+	if doubleCheckHeapSetType {
+		doubleCheckHeapType(x, dataSize, typ, nil, span)
+	}
+	return scanSize
+}
+
+func heapSetTypeSmallHeader(x, dataSize uintptr, typ *_type, header **_type, span *mspan) uintptr {
+	*header = typ
+	if doubleCheckHeapSetType {
+		doubleCheckHeapType(x, dataSize, typ, header, span)
+	}
+	return span.elemsize
+}
+
+func heapSetTypeLarge(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
 	gctyp := typ
+	// Write out the header.
+	span.largeType = gctyp
+	if doubleCheckHeapSetType {
+		doubleCheckHeapType(x, dataSize, typ, &span.largeType, span)
+	}
+	return span.elemsize
+}
+
+func doubleCheckHeapType(x, dataSize uintptr, gctyp *_type, header **_type, span *mspan) {
+	doubleCheckHeapPointers(x, dataSize, gctyp, header, span)
+
+	// To exercise the less common path more often, generate
+	// a random interior pointer and make sure iterating from
+	// that point works correctly too.
+	maxIterBytes := span.elemsize
 	if header == nil {
-		if doubleCheck && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) {
-			throw("tried to write heap bits, but no heap bits in span")
-		}
-		// Handle the case where we have no malloc header.
-		scanSize = span.writeHeapBitsSmall(x, dataSize, typ)
-	} else {
-		if typ.Kind_&abi.KindGCProg != 0 {
-			// Allocate space to unroll the gcprog. This space will consist of
-			// a dummy _type value and the unrolled gcprog. The dummy _type will
-			// refer to the bitmap, and the mspan will refer to the dummy _type.
-			if span.spanclass.sizeclass() != 0 {
-				throw("GCProg for type that isn't large")
-			}
-			spaceNeeded := alignUp(unsafe.Sizeof(_type{}), goarch.PtrSize)
-			heapBitsOff := spaceNeeded
-			spaceNeeded += alignUp(typ.PtrBytes/goarch.PtrSize/8, goarch.PtrSize)
-			npages := alignUp(spaceNeeded, pageSize) / pageSize
-			var progSpan *mspan
-			systemstack(func() {
-				progSpan = mheap_.allocManual(npages, spanAllocPtrScalarBits)
-				memclrNoHeapPointers(unsafe.Pointer(progSpan.base()), progSpan.npages*pageSize)
-			})
-			// Write a dummy _type in the new space.
-			//
-			// We only need to write size, PtrBytes, and GCData, since that's all
-			// the GC cares about.
-			gctyp = (*_type)(unsafe.Pointer(progSpan.base()))
-			gctyp.Size_ = typ.Size_
-			gctyp.PtrBytes = typ.PtrBytes
-			gctyp.GCData = (*byte)(add(unsafe.Pointer(progSpan.base()), heapBitsOff))
-			gctyp.TFlag = abi.TFlagUnrolledBitmap
-
-			// Expand the GC program into space reserved at the end of the new span.
-			runGCProg(addb(typ.GCData, 4), gctyp.GCData)
-		}
-
-		// Write out the header.
-		*header = gctyp
-		scanSize = span.elemsize
+		maxIterBytes = dataSize
 	}
-
-	if doubleCheck {
-		doubleCheckHeapPointers(x, dataSize, gctyp, header, span)
-
-		// To exercise the less common path more often, generate
-		// a random interior pointer and make sure iterating from
-		// that point works correctly too.
-		maxIterBytes := span.elemsize
-		if header == nil {
-			maxIterBytes = dataSize
-		}
-		off := alignUp(uintptr(cheaprand())%dataSize, goarch.PtrSize)
-		size := dataSize - off
-		if size == 0 {
-			off -= goarch.PtrSize
-			size += goarch.PtrSize
-		}
-		interior := x + off
-		size -= alignDown(uintptr(cheaprand())%size, goarch.PtrSize)
-		if size == 0 {
-			size = goarch.PtrSize
-		}
-		// Round up the type to the size of the type.
-		size = (size + gctyp.Size_ - 1) / gctyp.Size_ * gctyp.Size_
-		if interior+size > x+maxIterBytes {
-			size = x + maxIterBytes - interior
-		}
-		doubleCheckHeapPointersInterior(x, interior, size, dataSize, gctyp, header, span)
+	off := alignUp(uintptr(cheaprand())%dataSize, goarch.PtrSize)
+	size := dataSize - off
+	if size == 0 {
+		off -= goarch.PtrSize
+		size += goarch.PtrSize
 	}
-	return
+	interior := x + off
+	size -= alignDown(uintptr(cheaprand())%size, goarch.PtrSize)
+	if size == 0 {
+		size = goarch.PtrSize
+	}
+	// Round up the type to the size of the type.
+	size = (size + gctyp.Size_ - 1) / gctyp.Size_ * gctyp.Size_
+	if interior+size > x+maxIterBytes {
+		size = x + maxIterBytes - interior
+	}
+	doubleCheckHeapPointersInterior(x, interior, size, dataSize, gctyp, header, span)
 }

 func doubleCheckHeapPointers(x, dataSize uintptr, typ *_type, header **_type, span *mspan) {
@@ -794,7 +791,7 @@ func doubleCheckHeapPointers(x, dataSize uintptr, typ *_type, header **_type, sp
 			off := i % typ.Size_
 			if off < typ.PtrBytes {
 				j := off / goarch.PtrSize
-				want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0
+				want = *addb(getGCMask(typ), j/8)>>(j%8)&1 != 0
 			}
 		}
 		if want {
@@ -817,7 +814,7 @@ func doubleCheckHeapPointers(x, dataSize uintptr, typ *_type, header **_type, sp
 		}
 		println("runtime: extra pointer:", hex(addr))
 	}
-	print("runtime: hasHeader=", header != nil, " typ.Size_=", typ.Size_, " hasGCProg=", typ.Kind_&abi.KindGCProg != 0, "\n")
+	print("runtime: hasHeader=", header != nil, " typ.Size_=", typ.Size_, " TFlagGCMaskOnDemaind=", typ.TFlag&abi.TFlagGCMaskOnDemand != 0, "\n")
 	print("runtime: x=", hex(x), " dataSize=", dataSize, " elemsize=", span.elemsize, "\n")
 	print("runtime: typ=", unsafe.Pointer(typ), " typ.PtrBytes=", typ.PtrBytes, "\n")
 	print("runtime: limit=", hex(x+span.elemsize), "\n")
@@ -851,7 +848,7 @@ func doubleCheckHeapPointersInterior(x, interior, size, dataSize uintptr, typ *_
 			off := i % typ.Size_
 			if off < typ.PtrBytes {
 				j := off / goarch.PtrSize
-				want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0
+				want = *addb(getGCMask(typ), j/8)>>(j%8)&1 != 0
 			}
 		}
 		if want {
@@ -899,7 +896,7 @@ func doubleCheckHeapPointersInterior(x, interior, size, dataSize uintptr, typ *_
 			off := i % typ.Size_
 			if off < typ.PtrBytes {
 				j := off / goarch.PtrSize
-				want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0
+				want = *addb(getGCMask(typ), j/8)>>(j%8)&1 != 0
 			}
 		}
 		if want {
@@ -915,7 +912,7 @@ func doubleCheckHeapPointersInterior(x, interior, size, dataSize uintptr, typ *_

 //go:nosplit
 func doubleCheckTypePointersOfType(s *mspan, typ *_type, addr, size uintptr) {
-	if typ == nil || typ.Kind_&abi.KindGCProg != 0 {
+	if typ == nil {
 		return
 	}
 	if typ.Kind_&abi.KindMask == abi.Interface {
@@ -1365,9 +1362,6 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 //
 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
 // dst, src, and size must be pointer-aligned.
-// The type typ must have a plain bitmap, not a GC program.
-// The only use of this function is in channel sends, and the
-// 64 kB channel element limit takes care of this for us.
 //
 // Must not be preempted because it typically runs right before memmove,
 // and the GC must observe them as an atomic action.
@@ -1383,14 +1377,10 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 		println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size)
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
-	if typ.Kind_&abi.KindGCProg != 0 {
-		println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog")
-		throw("runtime: invalid typeBitsBulkBarrier")
-	}
 	if !writeBarrier.enabled {
 		return
 	}
-	ptrmask := typ.GCData
+	ptrmask := getGCMask(typ)
 	buf := &getg().m.p.ptr().wbBuf
 	var bits uint32
 	for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize {
@@ -1475,6 +1465,9 @@ func progToPointerMask(prog *byte, size uintptr) bitvector {
 //	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
 //	10000000 n c: repeat the previous n bits c times; n, c are varints
 //	1nnnnnnn c: repeat the previous n bits c times; c is a varint
+//
+// Currently, gc programs are only used for describing data and bss
+// sections of the binary.

 // runGCProg returns the number of 1-bit entries written to memory.
 func runGCProg(prog, dst *byte) uintptr {
@@ -1671,24 +1664,6 @@ Run:
 	return totalBits
 }

-// materializeGCProg allocates space for the (1-bit) pointer bitmask
-// for an object of size ptrdata.  Then it fills that space with the
-// pointer bitmask specified by the program prog.
-// The bitmask starts at s.startAddr.
-// The result must be deallocated with dematerializeGCProg.
-func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
-	// Each word of ptrdata needs one bit in the bitmap.
-	bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize)
-	// Compute the number of pages needed for bitmapBytes.
-	pages := divRoundUp(bitmapBytes, pageSize)
-	s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
-	runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
-	return s
-}
-func dematerializeGCProg(s *mspan) {
-	mheap_.freeManual(s, spanAllocPtrScalarBits)
-}
-
 func dumpGCProg(p *byte) {
 	nptr := 0
 	for {
@@ -1741,13 +1716,13 @@ func dumpGCProg(p *byte) {
 //
 //go:linkname reflect_gcbits reflect.gcbits
 func reflect_gcbits(x any) []byte {
-	return getgcmask(x)
+	return pointerMask(x)
 }

 // Returns GC type info for the pointer stored in ep for testing.
 // If ep points to the stack, only static live information will be returned
 // (i.e. not for objects which are only dynamically live stack objects).
-func getgcmask(ep any) (mask []byte) {
+func pointerMask(ep any) (mask []byte) {
 	e := *efaceOf(&ep)
 	p := e.data
 	t := e._type
@@ -1823,50 +1798,48 @@ func getgcmask(ep any) (mask []byte) {
 			maskFromHeap = maskFromHeap[:len(maskFromHeap)-1]
 		}

-		if et.Kind_&abi.KindGCProg == 0 {
-			// Unroll again, but this time from the type information.
-			maskFromType := make([]byte, (limit-base)/goarch.PtrSize)
-			tp = s.typePointersOfType(et, base)
-			for {
-				var addr uintptr
-				if tp, addr = tp.next(limit); addr == 0 {
-					break
-				}
-				maskFromType[(addr-base)/goarch.PtrSize] = 1
+		// Unroll again, but this time from the type information.
+		maskFromType := make([]byte, (limit-base)/goarch.PtrSize)
+		tp = s.typePointersOfType(et, base)
+		for {
+			var addr uintptr
+			if tp, addr = tp.next(limit); addr == 0 {
+				break
 			}
+			maskFromType[(addr-base)/goarch.PtrSize] = 1
+		}

-			// Validate that the prefix of maskFromType is equal to
-			// maskFromHeap. maskFromType may contain more pointers than
-			// maskFromHeap produces because maskFromHeap may be able to
-			// get exact type information for certain classes of objects.
-			// With maskFromType, we're always just tiling the type bitmap
-			// through to the elemsize.
-			//
-			// It's OK if maskFromType has pointers in elemsize that extend
-			// past the actual populated space; we checked above that all
-			// that space is zeroed, so just the GC will just see nil pointers.
-			differs := false
-			for i := range maskFromHeap {
-				if maskFromHeap[i] != maskFromType[i] {
-					differs = true
-					break
-				}
+		// Validate that the prefix of maskFromType is equal to
+		// maskFromHeap. maskFromType may contain more pointers than
+		// maskFromHeap produces because maskFromHeap may be able to
+		// get exact type information for certain classes of objects.
+		// With maskFromType, we're always just tiling the type bitmap
+		// through to the elemsize.
+		//
+		// It's OK if maskFromType has pointers in elemsize that extend
+		// past the actual populated space; we checked above that all
+		// that space is zeroed, so just the GC will just see nil pointers.
+		differs := false
+		for i := range maskFromHeap {
+			if maskFromHeap[i] != maskFromType[i] {
+				differs = true
+				break
 			}
+		}

-			if differs {
-				print("runtime: heap mask=")
-				for _, b := range maskFromHeap {
-					print(b)
-				}
-				println()
-				print("runtime: type mask=")
-				for _, b := range maskFromType {
-					print(b)
-				}
-				println()
-				print("runtime: type=", toRType(et).string(), "\n")
-				throw("found two different masks from two different methods")
+		if differs {
+			print("runtime: heap mask=")
+			for _, b := range maskFromHeap {
+				print(b)
 			}
+			println()
+			print("runtime: type mask=")
+			for _, b := range maskFromType {
+				print(b)
+			}
+			println()
+			print("runtime: type=", toRType(et).string(), "\n")
+			throw("found two different masks from two different methods")
 		}

 		// Select the heap mask to return. We may not have a type mask.
--- a/Show More
+++ b/Show More