Initial commit: Go 1.23 release state

This commit is contained in:
Vorapol Rinsatitnon
2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions

102
src/internal/abi/abi.go Normal file
View File

@@ -0,0 +1,102 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
import (
"internal/goarch"
"unsafe"
)
// RegArgs is a struct that has space for each argument
// and return value register on the current architecture.
//
// Assembly code knows the layout of the first two fields
// of RegArgs.
//
// RegArgs also contains additional space to hold pointers
// when it may not be safe to keep them only in the integer
// register space otherwise.
type RegArgs struct {
// Values in these slots should be precisely the bit-by-bit
// representation of how they would appear in a register.
//
// This means that on big endian arches, integer values should
// be in the top bits of the slot. Floats are usually just
// directly represented, but some architectures treat narrow
// width floating point values specially (e.g. they're promoted
// first, or they need to be NaN-boxed).
Ints [IntArgRegs]uintptr // untyped integer registers
Floats [FloatArgRegs]uint64 // untyped float registers
// Fields above this point are known to assembly.
// Ptrs is a space that duplicates Ints but with pointer type,
// used to make pointers passed or returned in registers
// visible to the GC by making the type unsafe.Pointer.
Ptrs [IntArgRegs]unsafe.Pointer
// ReturnIsPtr is a bitmap that indicates which registers
// contain or will contain pointers on the return path from
// a reflectcall. The i'th bit indicates whether the i'th
// register contains or will contain a valid Go pointer.
ReturnIsPtr IntArgRegBitmap
}
func (r *RegArgs) Dump() {
print("Ints:")
for _, x := range r.Ints {
print(" ", x)
}
println()
print("Floats:")
for _, x := range r.Floats {
print(" ", x)
}
println()
print("Ptrs:")
for _, x := range r.Ptrs {
print(" ", x)
}
println()
}
// IntRegArgAddr returns a pointer inside of r.Ints[reg] that is appropriately
// offset for an argument of size argSize.
//
// argSize must be non-zero, fit in a register, and a power-of-two.
//
// This method is a helper for dealing with the endianness of different CPU
// architectures, since sub-word-sized arguments in big endian architectures
// need to be "aligned" to the upper edge of the register to be interpreted
// by the CPU correctly.
func (r *RegArgs) IntRegArgAddr(reg int, argSize uintptr) unsafe.Pointer {
if argSize > goarch.PtrSize || argSize == 0 || argSize&(argSize-1) != 0 {
panic("invalid argSize")
}
offset := uintptr(0)
if goarch.BigEndian {
offset = goarch.PtrSize - argSize
}
return unsafe.Pointer(uintptr(unsafe.Pointer(&r.Ints[reg])) + offset)
}
// IntArgRegBitmap is a bitmap large enough to hold one bit per
// integer argument/return register.
type IntArgRegBitmap [(IntArgRegs + 7) / 8]uint8
// Set sets the i'th bit of the bitmap to 1.
func (b *IntArgRegBitmap) Set(i int) {
b[i/8] |= uint8(1) << (i % 8)
}
// Get returns whether the i'th bit of the bitmap is set.
//
// nosplit because it's called in extremely sensitive contexts, like
// on the reflectcall return path.
//
//go:nosplit
func (b *IntArgRegBitmap) Get(i int) bool {
return b[i/8]&(uint8(1)<<(i%8)) != 0
}

View File

@@ -0,0 +1,18 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
const (
// See abi_generic.go.
// RAX, RBX, RCX, RDI, RSI, R8, R9, R10, R11.
IntArgRegs = 9
// X0 -> X14.
FloatArgRegs = 15
// We use SSE2 registers which support 64-bit float operations.
EffectiveFloatRegSize = 8
)

View File

@@ -0,0 +1,17 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
const (
// See abi_generic.go.
// R0 - R15.
IntArgRegs = 16
// F0 - F15.
FloatArgRegs = 16
EffectiveFloatRegSize = 8
)

View File

@@ -0,0 +1,38 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.regabiargs && !amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64
package abi
const (
// ABI-related constants.
//
// In the generic case, these are all zero
// which lets them gracefully degrade to ABI0.
// IntArgRegs is the number of registers dedicated
// to passing integer argument values. Result registers are identical
// to argument registers, so this number is used for those too.
IntArgRegs = 0
// FloatArgRegs is the number of registers dedicated
// to passing floating-point argument values. Result registers are
// identical to argument registers, so this number is used for
// those too.
FloatArgRegs = 0
// EffectiveFloatRegSize describes the width of floating point
// registers on the current platform from the ABI's perspective.
//
// Since Go only supports 32-bit and 64-bit floating point primitives,
// this number should be either 0, 4, or 8. 0 indicates no floating
// point registers for the ABI or that floating point values will be
// passed via the softfloat ABI.
//
// For platforms that support larger floating point register widths,
// such as x87's 80-bit "registers" (not that we support x87 currently),
// use 8.
EffectiveFloatRegSize = 0
)

View File

@@ -0,0 +1,17 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
const (
// See abi_generic.go.
// R4 - R19
IntArgRegs = 16
// F0 - F15
FloatArgRegs = 16
EffectiveFloatRegSize = 8
)

View File

@@ -0,0 +1,19 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
package abi
const (
// See abi_generic.go.
// R3 - R10, R14 - R17.
IntArgRegs = 12
// F1 - F12.
FloatArgRegs = 12
EffectiveFloatRegSize = 8
)

View File

@@ -0,0 +1,17 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
const (
// See abi_generic.go.
// X8 - X23
IntArgRegs = 16
// F8 - F23.
FloatArgRegs = 16
EffectiveFloatRegSize = 8
)

View File

@@ -0,0 +1,79 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi_test
import (
"internal/abi"
"internal/testenv"
"path/filepath"
"strings"
"testing"
)
func TestFuncPC(t *testing.T) {
// Test that FuncPC* can get correct function PC.
pcFromAsm := abi.FuncPCTestFnAddr
// Test FuncPC for locally defined function
pcFromGo := abi.FuncPCTest()
if pcFromGo != pcFromAsm {
t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
}
// Test FuncPC for imported function
pcFromGo = abi.FuncPCABI0(abi.FuncPCTestFn)
if pcFromGo != pcFromAsm {
t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
}
}
func TestFuncPCCompileError(t *testing.T) {
// Test that FuncPC* on a function of a mismatched ABI is rejected.
testenv.MustHaveGoBuild(t)
// We want to test internal package, which we cannot normally import.
// Run the assembler and compiler manually.
tmpdir := t.TempDir()
asmSrc := filepath.Join("testdata", "x.s")
goSrc := filepath.Join("testdata", "x.go")
symabi := filepath.Join(tmpdir, "symabi")
obj := filepath.Join(tmpdir, "x.o")
// Write an importcfg file for the dependencies of the package.
importcfgfile := filepath.Join(tmpdir, "hello.importcfg")
testenv.WriteImportcfg(t, importcfgfile, nil, "internal/abi")
// parse assembly code for symabi.
cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-p=p", "-gensymabis", "-o", symabi, asmSrc)
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("go tool asm -gensymabis failed: %v\n%s", err, out)
}
// compile go code.
cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-importcfg="+importcfgfile, "-p=p", "-symabis", symabi, "-o", obj, goSrc)
out, err = cmd.CombinedOutput()
if err == nil {
t.Fatalf("go tool compile did not fail")
}
// Expect errors in line 17, 18, 20, no errors on other lines.
want := []string{"x.go:17", "x.go:18", "x.go:20"}
got := strings.Split(string(out), "\n")
if got[len(got)-1] == "" {
got = got[:len(got)-1] // remove last empty line
}
for i, s := range got {
if !strings.Contains(s, want[i]) {
t.Errorf("did not error on line %s", want[i])
}
}
if len(got) != len(want) {
t.Errorf("unexpected number of errors, want %d, got %d", len(want), len(got))
}
if t.Failed() {
t.Logf("output:\n%s", string(out))
}
}

View File

@@ -0,0 +1,27 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#ifdef GOARCH_386
#define PTRSIZE 4
#endif
#ifdef GOARCH_arm
#define PTRSIZE 4
#endif
#ifdef GOARCH_mips
#define PTRSIZE 4
#endif
#ifdef GOARCH_mipsle
#define PTRSIZE 4
#endif
#ifndef PTRSIZE
#define PTRSIZE 8
#endif
TEXT internalabi·FuncPCTestFn(SB),NOSPLIT,$0-0
RET
GLOBL internalabi·FuncPCTestFnAddr(SB), NOPTR, $PTRSIZE
DATA internalabi·FuncPCTestFnAddr(SB)/PTRSIZE, $internalabi·FuncPCTestFn(SB)

View File

@@ -0,0 +1,28 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
// These functions are the build-time version of the Go type data structures.
// Their contents must be kept in sync with their definitions.
// Because the host and target type sizes can differ, the compiler and
// linker cannot use the host information that they might get from
// either unsafe.Sizeof and Alignof, nor runtime, reflect, or reflectlite.
// CommonSize returns sizeof(Type) for a compilation target with a given ptrSize
func CommonSize(ptrSize int) int { return 4*ptrSize + 8 + 8 }
// StructFieldSize returns sizeof(StructField) for a compilation target with a given ptrSize
func StructFieldSize(ptrSize int) int { return 3 * ptrSize }
// UncommonSize returns sizeof(UncommonType). This currently does not depend on ptrSize.
// This exported function is in an internal package, so it may change to depend on ptrSize in the future.
func UncommonSize() uint64 { return 4 + 2 + 2 + 4 + 4 }
// TFlagOff returns the offset of Type.TFlag for a compilation target with a given ptrSize
func TFlagOff(ptrSize int) int { return 2*ptrSize + 4 }
// ITabTypeOff returns the offset of ITab.Type for a compilation target with a given ptrSize
func ITabTypeOff(ptrSize int) int { return ptrSize }

View File

@@ -0,0 +1,33 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
import "unsafe"
// NoEscape hides the pointer p from escape analysis, preventing it
// from escaping to the heap. It compiles down to nothing.
//
// WARNING: This is very subtle to use correctly. The caller must
// ensure that it's truly safe for p to not escape to the heap by
// maintaining runtime pointer invariants (for example, that globals
// and the heap may not generally point into a stack).
//
//go:nosplit
//go:nocheckptr
func NoEscape(p unsafe.Pointer) unsafe.Pointer {
x := uintptr(p)
return unsafe.Pointer(x ^ 0)
}
var alwaysFalse bool
var escapeSink any
// Escape forces any pointers in x to escape to the heap.
func Escape[T any](x T) T {
if alwaysFalse {
escapeSink = x
}
return x
}

View File

@@ -0,0 +1,14 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
func FuncPCTestFn()
var FuncPCTestFnAddr uintptr // address of FuncPCTestFn, directly retrieved from assembly
//go:noinline
func FuncPCTest() uintptr {
return FuncPCABI0(FuncPCTestFn)
}

View File

@@ -0,0 +1,31 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !gccgo
package abi
// FuncPC* intrinsics.
//
// CAREFUL: In programs with plugins, FuncPC* can return different values
// for the same function (because there are actually multiple copies of
// the same function in the address space). To be safe, don't use the
// results of this function in any == expression. It is only safe to
// use the result as an address at which to start executing code.
// FuncPCABI0 returns the entry PC of the function f, which must be a
// direct reference of a function defined as ABI0. Otherwise it is a
// compile-time error.
//
// Implemented as a compile intrinsic.
func FuncPCABI0(f interface{}) uintptr
// FuncPCABIInternal returns the entry PC of the function f. If f is a
// direct reference of a function, it must be defined as ABIInternal.
// Otherwise it is a compile-time error. If f is not a direct reference
// of a defined function, it assumes that f is a func value. Otherwise
// the behavior is undefined.
//
// Implemented as a compile intrinsic.
func FuncPCABIInternal(f interface{}) uintptr

View File

@@ -0,0 +1,21 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// For bootstrapping with gccgo.
//go:build gccgo
package abi
import "unsafe"
func FuncPCABI0(f interface{}) uintptr {
words := (*[2]unsafe.Pointer)(unsafe.Pointer(&f))
return *(*uintptr)(unsafe.Pointer(words[1]))
}
func FuncPCABIInternal(f interface{}) uintptr {
words := (*[2]unsafe.Pointer)(unsafe.Pointer(&f))
return *(*uintptr)(unsafe.Pointer(words[1]))
}

27
src/internal/abi/iface.go Normal file
View File

@@ -0,0 +1,27 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
import "unsafe"
// The first word of every non-empty interface type contains an *ITab.
// It records the underlying concrete type (Type), the interface type it
// is implementing (Inter), and some ancillary information.
//
// allocated in non-garbage-collected memory
type ITab struct {
Inter *InterfaceType
Type *Type
Hash uint32 // copy of Type.Hash. Used for type switches.
Fun [1]uintptr // variable sized. fun[0]==0 means Type does not implement Inter.
}
// EmptyInterface describes the layout of a "interface{}" or a "any."
// These are represented differently than non-empty interface, as the first
// word always points to an abi.Type.
type EmptyInterface struct {
Type *Type
Data unsafe.Pointer
}

19
src/internal/abi/map.go Normal file
View File

@@ -0,0 +1,19 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
// Map constants common to several packages
// runtime/runtime-gdb.py:MapTypePrinter contains its own copy
const (
// Maximum number of key/elem pairs a bucket can hold.
MapBucketCountBits = 3 // log2 of number of elements in a bucket.
MapBucketCount = 1 << MapBucketCountBits
// Maximum key or elem size to keep inline (instead of mallocing per element).
// Must fit in a uint8.
// Note: fast map functions cannot handle big elems (bigger than MapMaxElemBytes).
MapMaxKeyBytes = 128
MapMaxElemBytes = 128 // Must fit in a uint8.
)

View File

@@ -0,0 +1,18 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
type RF_State int
// These constants are shared between the compiler, which uses them for state functions
// and panic indicators, and the runtime, which turns them into more meaningful strings
// For best code generation, RF_DONE and RF_READY should be 0 and 1.
const (
RF_DONE = RF_State(iota) // body of loop has exited in a non-panic way
RF_READY // body of loop has not exited yet, is not running -- this is not a panic index
RF_PANIC // body of loop is either currently running, or has panicked
RF_EXHAUSTED // iterator function return, i.e., sequence is "exhausted"
RF_MISSING_PANIC = 4 // body of loop panicked but iterator function defer-recovered it away
)

View File

@@ -0,0 +1,8 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
// ZeroValSize is the size in bytes of runtime.zeroVal.
const ZeroValSize = 1024

33
src/internal/abi/stack.go Normal file
View File

@@ -0,0 +1,33 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
const (
// StackNosplitBase is the base maximum number of bytes that a chain of
// NOSPLIT functions can use.
//
// This value must be multiplied by the stack guard multiplier, so do not
// use it directly. See runtime/stack.go:stackNosplit and
// cmd/internal/objabi/stack.go:StackNosplit.
StackNosplitBase = 800
// We have three different sequences for stack bounds checks, depending on
// whether the stack frame of a function is small, big, or huge.
// After a stack split check the SP is allowed to be StackSmall bytes below
// the stack guard.
//
// Functions that need frames <= StackSmall can perform the stack check
// using a single comparison directly between the stack guard and the SP
// because we ensure that StackSmall bytes of stack space are available
// beyond the stack guard.
StackSmall = 128
// Functions that need frames <= StackBig can assume that neither
// SP-framesize nor stackGuard-StackSmall will underflow, and thus use a
// more efficient check. In order to ensure this, StackBig must be <= the
// size of the unmapped space at zero.
StackBig = 4096
)

7
src/internal/abi/stub.s Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file silences errors about body-less functions
// that are provided by intrinsics in the latest version of the compiler,
// but may not be known to the bootstrap compiler.

View File

@@ -0,0 +1,61 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
type InterfaceSwitch struct {
Cache *InterfaceSwitchCache
NCases int
// Array of NCases elements.
// Each case must be a non-empty interface type.
Cases [1]*InterfaceType
}
type InterfaceSwitchCache struct {
Mask uintptr // mask for index. Must be a power of 2 minus 1
Entries [1]InterfaceSwitchCacheEntry // Mask+1 entries total
}
type InterfaceSwitchCacheEntry struct {
// type of source value (a *Type)
Typ uintptr
// case # to dispatch to
Case int
// itab to use for resulting case variable (a *runtime.itab)
Itab uintptr
}
const go122InterfaceSwitchCache = true
func UseInterfaceSwitchCache(goarch string) bool {
if !go122InterfaceSwitchCache {
return false
}
// We need an atomic load instruction to make the cache multithreaded-safe.
// (AtomicLoadPtr needs to be implemented in cmd/compile/internal/ssa/_gen/ARCH.rules.)
switch goarch {
case "amd64", "arm64", "loong64", "mips", "mipsle", "mips64", "mips64le", "ppc64", "ppc64le", "riscv64", "s390x":
return true
default:
return false
}
}
type TypeAssert struct {
Cache *TypeAssertCache
Inter *InterfaceType
CanFail bool
}
type TypeAssertCache struct {
Mask uintptr
Entries [1]TypeAssertCacheEntry
}
type TypeAssertCacheEntry struct {
// type of source value (a *runtime._type)
Typ uintptr
// itab to use for result (a *runtime.itab)
// nil if CanFail is set and conversion would fail.
Itab uintptr
}

111
src/internal/abi/symtab.go Normal file
View File

@@ -0,0 +1,111 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
// A FuncFlag records bits about a function, passed to the runtime.
type FuncFlag uint8
const (
// FuncFlagTopFrame indicates a function that appears at the top of its stack.
// The traceback routine stop at such a function and consider that a
// successful, complete traversal of the stack.
// Examples of TopFrame functions include goexit, which appears
// at the top of a user goroutine stack, and mstart, which appears
// at the top of a system goroutine stack.
FuncFlagTopFrame FuncFlag = 1 << iota
// FuncFlagSPWrite indicates a function that writes an arbitrary value to SP
// (any write other than adding or subtracting a constant amount).
// The traceback routines cannot encode such changes into the
// pcsp tables, so the function traceback cannot safely unwind past
// SPWrite functions. Stopping at an SPWrite function is considered
// to be an incomplete unwinding of the stack. In certain contexts
// (in particular garbage collector stack scans) that is a fatal error.
FuncFlagSPWrite
// FuncFlagAsm indicates that a function was implemented in assembly.
FuncFlagAsm
)
// A FuncID identifies particular functions that need to be treated
// specially by the runtime.
// Note that in some situations involving plugins, there may be multiple
// copies of a particular special runtime function.
type FuncID uint8
const (
// If you add a FuncID, you probably also want to add an entry to the map in
// ../../cmd/internal/objabi/funcid.go
FuncIDNormal FuncID = iota // not a special function
FuncID_abort
FuncID_asmcgocall
FuncID_asyncPreempt
FuncID_cgocallback
FuncID_corostart
FuncID_debugCallV2
FuncID_gcBgMarkWorker
FuncID_goexit
FuncID_gogo
FuncID_gopanic
FuncID_handleAsyncEvent
FuncID_mcall
FuncID_morestack
FuncID_mstart
FuncID_panicwrap
FuncID_rt0_go
FuncID_runfinq
FuncID_runtime_main
FuncID_sigpanic
FuncID_systemstack
FuncID_systemstack_switch
FuncIDWrapper // any autogenerated code (hash/eq algorithms, method wrappers, etc.)
)
// ArgsSizeUnknown is set in Func.argsize to mark all functions
// whose argument size is unknown (C vararg functions, and
// assembly code without an explicit specification).
// This value is generated by the compiler, assembler, or linker.
const ArgsSizeUnknown = -0x80000000
// IDs for PCDATA and FUNCDATA tables in Go binaries.
//
// These must agree with ../../../runtime/funcdata.h.
const (
PCDATA_UnsafePoint = 0
PCDATA_StackMapIndex = 1
PCDATA_InlTreeIndex = 2
PCDATA_ArgLiveIndex = 3
FUNCDATA_ArgsPointerMaps = 0
FUNCDATA_LocalsPointerMaps = 1
FUNCDATA_StackObjects = 2
FUNCDATA_InlTree = 3
FUNCDATA_OpenCodedDeferInfo = 4
FUNCDATA_ArgInfo = 5
FUNCDATA_ArgLiveInfo = 6
FUNCDATA_WrapInfo = 7
)
// Special values for the PCDATA_UnsafePoint table.
const (
UnsafePointSafe = -1 // Safe for async preemption
UnsafePointUnsafe = -2 // Unsafe for async preemption
// UnsafePointRestart1(2) apply on a sequence of instructions, within
// which if an async preemption happens, we should back off the PC
// to the start of the sequence when resuming.
// We need two so we can distinguish the start/end of the sequence
// in case that two sequences are next to each other.
UnsafePointRestart1 = -3
UnsafePointRestart2 = -4
// Like UnsafePointRestart1, but back to function entry if async preempted.
UnsafePointRestartAtEntry = -5
)
const MINFUNC = 16 // minimum size for a function
const FuncTabBucketSize = 256 * MINFUNC // size of bucket in the pc->func lookup table

22
src/internal/abi/testdata/x.go vendored Normal file
View File

@@ -0,0 +1,22 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x
import "internal/abi"
func Fn0() // defined in assembly
func Fn1() {}
var FnExpr func()
func test() {
_ = abi.FuncPCABI0(Fn0) // line 16, no error
_ = abi.FuncPCABIInternal(Fn0) // line 17, error
_ = abi.FuncPCABI0(Fn1) // line 18, error
_ = abi.FuncPCABIInternal(Fn1) // line 19, no error
_ = abi.FuncPCABI0(FnExpr) // line 20, error
_ = abi.FuncPCABIInternal(FnExpr) // line 21, no error
}

6
src/internal/abi/testdata/x.s vendored Normal file
View File

@@ -0,0 +1,6 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
TEXT ·Fn0(SB), 0, $0-0
RET

803
src/internal/abi/type.go Normal file
View File

@@ -0,0 +1,803 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package abi
import (
"unsafe"
)
// Type is the runtime representation of a Go type.
//
// Be careful about accessing this type at build time, as the version
// of this type in the compiler/linker may not have the same layout
// as the version in the target binary, due to pointer width
// differences and any experiments. Use cmd/compile/internal/rttype
// or the functions in compiletype.go to access this type instead.
// (TODO: this admonition applies to every type in this package.
// Put it in some shared location?)
type Type struct {
Size_ uintptr
PtrBytes uintptr // number of (prefix) bytes in the type that can contain pointers
Hash uint32 // hash of type; avoids computation in hash tables
TFlag TFlag // extra type information flags
Align_ uint8 // alignment of variable with this type
FieldAlign_ uint8 // alignment of struct field with this type
Kind_ Kind // enumeration for C
// function for comparing objects of this type
// (ptr to object A, ptr to object B) -> ==?
Equal func(unsafe.Pointer, unsafe.Pointer) bool
// GCData stores the GC type data for the garbage collector.
// If the KindGCProg bit is set in kind, GCData is a GC program.
// Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
GCData *byte
Str NameOff // string form
PtrToThis TypeOff // type for pointer to this type, may be zero
}
// A Kind represents the specific kind of type that a Type represents.
// The zero Kind is not a valid kind.
type Kind uint8
const (
Invalid Kind = iota
Bool
Int
Int8
Int16
Int32
Int64
Uint
Uint8
Uint16
Uint32
Uint64
Uintptr
Float32
Float64
Complex64
Complex128
Array
Chan
Func
Interface
Map
Pointer
Slice
String
Struct
UnsafePointer
)
const (
// TODO (khr, drchase) why aren't these in TFlag? Investigate, fix if possible.
KindDirectIface Kind = 1 << 5
KindGCProg Kind = 1 << 6 // Type.gc points to GC program
KindMask Kind = (1 << 5) - 1
)
// TFlag is used by a Type to signal what extra type information is
// available in the memory directly following the Type value.
type TFlag uint8
const (
// TFlagUncommon means that there is a data with a type, UncommonType,
// just beyond the shared-per-type common data. That is, the data
// for struct types will store their UncommonType at one offset, the
// data for interface types will store their UncommonType at a different
// offset. UncommonType is always accessed via a pointer that is computed
// using trust-us-we-are-the-implementors pointer arithmetic.
//
// For example, if t.Kind() == Struct and t.tflag&TFlagUncommon != 0,
// then t has UncommonType data and it can be accessed as:
//
// type structTypeUncommon struct {
// structType
// u UncommonType
// }
// u := &(*structTypeUncommon)(unsafe.Pointer(t)).u
TFlagUncommon TFlag = 1 << 0
// TFlagExtraStar means the name in the str field has an
// extraneous '*' prefix. This is because for most types T in
// a program, the type *T also exists and reusing the str data
// saves binary size.
TFlagExtraStar TFlag = 1 << 1
// TFlagNamed means the type has a name.
TFlagNamed TFlag = 1 << 2
// TFlagRegularMemory means that equal and hash functions can treat
// this type as a single region of t.size bytes.
TFlagRegularMemory TFlag = 1 << 3
// TFlagUnrolledBitmap marks special types that are unrolled-bitmap
// versions of types with GC programs.
// These types need to be deallocated when the underlying object
// is freed.
TFlagUnrolledBitmap TFlag = 1 << 4
)
// NameOff is the offset to a name from moduledata.types. See resolveNameOff in runtime.
type NameOff int32
// TypeOff is the offset to a type from moduledata.types. See resolveTypeOff in runtime.
type TypeOff int32
// TextOff is an offset from the top of a text section. See (rtype).textOff in runtime.
type TextOff int32
// String returns the name of k.
func (k Kind) String() string {
if int(k) < len(kindNames) {
return kindNames[k]
}
return kindNames[0]
}
var kindNames = []string{
Invalid: "invalid",
Bool: "bool",
Int: "int",
Int8: "int8",
Int16: "int16",
Int32: "int32",
Int64: "int64",
Uint: "uint",
Uint8: "uint8",
Uint16: "uint16",
Uint32: "uint32",
Uint64: "uint64",
Uintptr: "uintptr",
Float32: "float32",
Float64: "float64",
Complex64: "complex64",
Complex128: "complex128",
Array: "array",
Chan: "chan",
Func: "func",
Interface: "interface",
Map: "map",
Pointer: "ptr",
Slice: "slice",
String: "string",
Struct: "struct",
UnsafePointer: "unsafe.Pointer",
}
// TypeOf returns the abi.Type of some value.
func TypeOf(a any) *Type {
eface := *(*EmptyInterface)(unsafe.Pointer(&a))
// Types are either static (for compiler-created types) or
// heap-allocated but always reachable (for reflection-created
// types, held in the central map). So there is no need to
// escape types. noescape here help avoid unnecessary escape
// of v.
return (*Type)(NoEscape(unsafe.Pointer(eface.Type)))
}
// TypeFor returns the abi.Type for a type parameter.
func TypeFor[T any]() *Type {
var v T
if t := TypeOf(v); t != nil {
return t // optimize for T being a non-interface kind
}
return TypeOf((*T)(nil)).Elem() // only for an interface kind
}
func (t *Type) Kind() Kind { return t.Kind_ & KindMask }
func (t *Type) HasName() bool {
return t.TFlag&TFlagNamed != 0
}
// Pointers reports whether t contains pointers.
func (t *Type) Pointers() bool { return t.PtrBytes != 0 }
// IfaceIndir reports whether t is stored indirectly in an interface value.
func (t *Type) IfaceIndir() bool {
return t.Kind_&KindDirectIface == 0
}
// isDirectIface reports whether t is stored directly in an interface value.
func (t *Type) IsDirectIface() bool {
return t.Kind_&KindDirectIface != 0
}
func (t *Type) GcSlice(begin, end uintptr) []byte {
return unsafe.Slice(t.GCData, int(end))[begin:]
}
// Method on non-interface type
type Method struct {
Name NameOff // name of method
Mtyp TypeOff // method type (without receiver)
Ifn TextOff // fn used in interface call (one-word receiver)
Tfn TextOff // fn used for normal method call
}
// UncommonType is present only for defined types or types with methods
// (if T is a defined type, the uncommonTypes for T and *T have methods).
// Using a pointer to this struct reduces the overall size required
// to describe a non-defined type with no methods.
type UncommonType struct {
PkgPath NameOff // import path; empty for built-in types like int, string
Mcount uint16 // number of methods
Xcount uint16 // number of exported methods
Moff uint32 // offset from this uncommontype to [mcount]Method
_ uint32 // unused
}
func (t *UncommonType) Methods() []Method {
if t.Mcount == 0 {
return nil
}
return (*[1 << 16]Method)(addChecked(unsafe.Pointer(t), uintptr(t.Moff), "t.mcount > 0"))[:t.Mcount:t.Mcount]
}
func (t *UncommonType) ExportedMethods() []Method {
if t.Xcount == 0 {
return nil
}
return (*[1 << 16]Method)(addChecked(unsafe.Pointer(t), uintptr(t.Moff), "t.xcount > 0"))[:t.Xcount:t.Xcount]
}
// addChecked returns p+x.
//
// The whySafe string is ignored, so that the function still inlines
// as efficiently as p+x, but all call sites should use the string to
// record why the addition is safe, which is to say why the addition
// does not cause x to advance to the very end of p's allocation
// and therefore point incorrectly at the next block in memory.
func addChecked(p unsafe.Pointer, x uintptr, whySafe string) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + x)
}
// Imethod represents a method on an interface type
type Imethod struct {
Name NameOff // name of method
Typ TypeOff // .(*FuncType) underneath
}
// ArrayType represents a fixed array type.
type ArrayType struct {
Type
Elem *Type // array element type
Slice *Type // slice type
Len uintptr
}
// Len returns the length of t if t is an array type, otherwise 0
func (t *Type) Len() int {
if t.Kind() == Array {
return int((*ArrayType)(unsafe.Pointer(t)).Len)
}
return 0
}
func (t *Type) Common() *Type {
return t
}
type ChanDir int
const (
RecvDir ChanDir = 1 << iota // <-chan
SendDir // chan<-
BothDir = RecvDir | SendDir // chan
InvalidDir ChanDir = 0
)
// ChanType represents a channel type
type ChanType struct {
Type
Elem *Type
Dir ChanDir
}
type structTypeUncommon struct {
StructType
u UncommonType
}
// ChanDir returns the direction of t if t is a channel type, otherwise InvalidDir (0).
func (t *Type) ChanDir() ChanDir {
if t.Kind() == Chan {
ch := (*ChanType)(unsafe.Pointer(t))
return ch.Dir
}
return InvalidDir
}
// Uncommon returns a pointer to T's "uncommon" data if there is any, otherwise nil
func (t *Type) Uncommon() *UncommonType {
if t.TFlag&TFlagUncommon == 0 {
return nil
}
switch t.Kind() {
case Struct:
return &(*structTypeUncommon)(unsafe.Pointer(t)).u
case Pointer:
type u struct {
PtrType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Func:
type u struct {
FuncType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Slice:
type u struct {
SliceType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Array:
type u struct {
ArrayType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Chan:
type u struct {
ChanType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Map:
type u struct {
MapType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
case Interface:
type u struct {
InterfaceType
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
default:
type u struct {
Type
u UncommonType
}
return &(*u)(unsafe.Pointer(t)).u
}
}
// Elem returns the element type for t if t is an array, channel, map, pointer, or slice, otherwise nil.
func (t *Type) Elem() *Type {
switch t.Kind() {
case Array:
tt := (*ArrayType)(unsafe.Pointer(t))
return tt.Elem
case Chan:
tt := (*ChanType)(unsafe.Pointer(t))
return tt.Elem
case Map:
tt := (*MapType)(unsafe.Pointer(t))
return tt.Elem
case Pointer:
tt := (*PtrType)(unsafe.Pointer(t))
return tt.Elem
case Slice:
tt := (*SliceType)(unsafe.Pointer(t))
return tt.Elem
}
return nil
}
// StructType returns t cast to a *StructType, or nil if its tag does not match.
func (t *Type) StructType() *StructType {
if t.Kind() != Struct {
return nil
}
return (*StructType)(unsafe.Pointer(t))
}
// MapType returns t cast to a *MapType, or nil if its tag does not match.
func (t *Type) MapType() *MapType {
if t.Kind() != Map {
return nil
}
return (*MapType)(unsafe.Pointer(t))
}
// ArrayType returns t cast to a *ArrayType, or nil if its tag does not match.
func (t *Type) ArrayType() *ArrayType {
if t.Kind() != Array {
return nil
}
return (*ArrayType)(unsafe.Pointer(t))
}
// FuncType returns t cast to a *FuncType, or nil if its tag does not match.
func (t *Type) FuncType() *FuncType {
if t.Kind() != Func {
return nil
}
return (*FuncType)(unsafe.Pointer(t))
}
// InterfaceType returns t cast to a *InterfaceType, or nil if its tag does not match.
func (t *Type) InterfaceType() *InterfaceType {
if t.Kind() != Interface {
return nil
}
return (*InterfaceType)(unsafe.Pointer(t))
}
// Size returns the size of data with type t.
func (t *Type) Size() uintptr { return t.Size_ }
// Align returns the alignment of data with type t.
func (t *Type) Align() int { return int(t.Align_) }
func (t *Type) FieldAlign() int { return int(t.FieldAlign_) }
type InterfaceType struct {
Type
PkgPath Name // import path
Methods []Imethod // sorted by hash
}
func (t *Type) ExportedMethods() []Method {
ut := t.Uncommon()
if ut == nil {
return nil
}
return ut.ExportedMethods()
}
func (t *Type) NumMethod() int {
if t.Kind() == Interface {
tt := (*InterfaceType)(unsafe.Pointer(t))
return tt.NumMethod()
}
return len(t.ExportedMethods())
}
// NumMethod returns the number of interface methods in the type's method set.
func (t *InterfaceType) NumMethod() int { return len(t.Methods) }
type MapType struct {
Type
Key *Type
Elem *Type
Bucket *Type // internal type representing a hash bucket
// function for hashing keys (ptr to key, seed) -> hash
Hasher func(unsafe.Pointer, uintptr) uintptr
KeySize uint8 // size of key slot
ValueSize uint8 // size of elem slot
BucketSize uint16 // size of bucket
Flags uint32
}
// Note: flag values must match those used in the TMAP case
// in ../cmd/compile/internal/reflectdata/reflect.go:writeType.
func (mt *MapType) IndirectKey() bool { // store ptr to key instead of key itself
return mt.Flags&1 != 0
}
func (mt *MapType) IndirectElem() bool { // store ptr to elem instead of elem itself
return mt.Flags&2 != 0
}
func (mt *MapType) ReflexiveKey() bool { // true if k==k for all keys
return mt.Flags&4 != 0
}
func (mt *MapType) NeedKeyUpdate() bool { // true if we need to update key on an overwrite
return mt.Flags&8 != 0
}
func (mt *MapType) HashMightPanic() bool { // true if hash function might panic
return mt.Flags&16 != 0
}
func (t *Type) Key() *Type {
if t.Kind() == Map {
return (*MapType)(unsafe.Pointer(t)).Key
}
return nil
}
type SliceType struct {
Type
Elem *Type // slice element type
}
// funcType represents a function type.
//
// A *Type for each in and out parameter is stored in an array that
// directly follows the funcType (and possibly its uncommonType). So
// a function type with one method, one input, and one output is:
//
// struct {
// funcType
// uncommonType
// [2]*rtype // [0] is in, [1] is out
// }
type FuncType struct {
Type
InCount uint16
OutCount uint16 // top bit is set if last input parameter is ...
}
func (t *FuncType) In(i int) *Type {
return t.InSlice()[i]
}
func (t *FuncType) NumIn() int {
return int(t.InCount)
}
func (t *FuncType) NumOut() int {
return int(t.OutCount & (1<<15 - 1))
}
func (t *FuncType) Out(i int) *Type {
return (t.OutSlice()[i])
}
func (t *FuncType) InSlice() []*Type {
uadd := unsafe.Sizeof(*t)
if t.TFlag&TFlagUncommon != 0 {
uadd += unsafe.Sizeof(UncommonType{})
}
if t.InCount == 0 {
return nil
}
return (*[1 << 16]*Type)(addChecked(unsafe.Pointer(t), uadd, "t.inCount > 0"))[:t.InCount:t.InCount]
}
func (t *FuncType) OutSlice() []*Type {
outCount := uint16(t.NumOut())
if outCount == 0 {
return nil
}
uadd := unsafe.Sizeof(*t)
if t.TFlag&TFlagUncommon != 0 {
uadd += unsafe.Sizeof(UncommonType{})
}
return (*[1 << 17]*Type)(addChecked(unsafe.Pointer(t), uadd, "outCount > 0"))[t.InCount : t.InCount+outCount : t.InCount+outCount]
}
func (t *FuncType) IsVariadic() bool {
return t.OutCount&(1<<15) != 0
}
type PtrType struct {
Type
Elem *Type // pointer element (pointed at) type
}
type StructField struct {
Name Name // name is always non-empty
Typ *Type // type of field
Offset uintptr // byte offset of field
}
func (f *StructField) Embedded() bool {
return f.Name.IsEmbedded()
}
type StructType struct {
Type
PkgPath Name
Fields []StructField
}
// Name is an encoded type Name with optional extra data.
//
// The first byte is a bit field containing:
//
// 1<<0 the name is exported
// 1<<1 tag data follows the name
// 1<<2 pkgPath nameOff follows the name and tag
// 1<<3 the name is of an embedded (a.k.a. anonymous) field
//
// Following that, there is a varint-encoded length of the name,
// followed by the name itself.
//
// If tag data is present, it also has a varint-encoded length
// followed by the tag itself.
//
// If the import path follows, then 4 bytes at the end of
// the data form a nameOff. The import path is only set for concrete
// methods that are defined in a different package than their type.
//
// If a name starts with "*", then the exported bit represents
// whether the pointed to type is exported.
//
// Note: this encoding must match here and in:
// cmd/compile/internal/reflectdata/reflect.go
// cmd/link/internal/ld/decodesym.go
type Name struct {
Bytes *byte
}
// DataChecked does pointer arithmetic on n's Bytes, and that arithmetic is asserted to
// be safe for the reason in whySafe (which can appear in a backtrace, etc.)
func (n Name) DataChecked(off int, whySafe string) *byte {
return (*byte)(addChecked(unsafe.Pointer(n.Bytes), uintptr(off), whySafe))
}
// Data does pointer arithmetic on n's Bytes, and that arithmetic is asserted to
// be safe because the runtime made the call (other packages use DataChecked)
func (n Name) Data(off int) *byte {
return (*byte)(addChecked(unsafe.Pointer(n.Bytes), uintptr(off), "the runtime doesn't need to give you a reason"))
}
// IsExported returns "is n exported?"
func (n Name) IsExported() bool {
return (*n.Bytes)&(1<<0) != 0
}
// HasTag returns true iff there is tag data following this name
func (n Name) HasTag() bool {
return (*n.Bytes)&(1<<1) != 0
}
// IsEmbedded returns true iff n is embedded (an anonymous field).
func (n Name) IsEmbedded() bool {
return (*n.Bytes)&(1<<3) != 0
}
// ReadVarint parses a varint as encoded by encoding/binary.
// It returns the number of encoded bytes and the encoded value.
func (n Name) ReadVarint(off int) (int, int) {
v := 0
for i := 0; ; i++ {
x := *n.DataChecked(off+i, "read varint")
v += int(x&0x7f) << (7 * i)
if x&0x80 == 0 {
return i + 1, v
}
}
}
// IsBlank indicates whether n is "_".
func (n Name) IsBlank() bool {
if n.Bytes == nil {
return false
}
_, l := n.ReadVarint(1)
return l == 1 && *n.Data(2) == '_'
}
// writeVarint writes n to buf in varint form. Returns the
// number of bytes written. n must be nonnegative.
// Writes at most 10 bytes.
func writeVarint(buf []byte, n int) int {
for i := 0; ; i++ {
b := byte(n & 0x7f)
n >>= 7
if n == 0 {
buf[i] = b
return i + 1
}
buf[i] = b | 0x80
}
}
// Name returns the tag string for n, or empty if there is none.
func (n Name) Name() string {
if n.Bytes == nil {
return ""
}
i, l := n.ReadVarint(1)
return unsafe.String(n.DataChecked(1+i, "non-empty string"), l)
}
// Tag returns the tag string for n, or empty if there is none.
func (n Name) Tag() string {
if !n.HasTag() {
return ""
}
i, l := n.ReadVarint(1)
i2, l2 := n.ReadVarint(1 + i + l)
return unsafe.String(n.DataChecked(1+i+l+i2, "non-empty string"), l2)
}
func NewName(n, tag string, exported, embedded bool) Name {
if len(n) >= 1<<29 {
panic("abi.NewName: name too long: " + n[:1024] + "...")
}
if len(tag) >= 1<<29 {
panic("abi.NewName: tag too long: " + tag[:1024] + "...")
}
var nameLen [10]byte
var tagLen [10]byte
nameLenLen := writeVarint(nameLen[:], len(n))
tagLenLen := writeVarint(tagLen[:], len(tag))
var bits byte
l := 1 + nameLenLen + len(n)
if exported {
bits |= 1 << 0
}
if len(tag) > 0 {
l += tagLenLen + len(tag)
bits |= 1 << 1
}
if embedded {
bits |= 1 << 3
}
b := make([]byte, l)
b[0] = bits
copy(b[1:], nameLen[:nameLenLen])
copy(b[1+nameLenLen:], n)
if len(tag) > 0 {
tb := b[1+nameLenLen+len(n):]
copy(tb, tagLen[:tagLenLen])
copy(tb[tagLenLen:], tag)
}
return Name{Bytes: &b[0]}
}
const (
TraceArgsLimit = 10 // print no more than 10 args/components
TraceArgsMaxDepth = 5 // no more than 5 layers of nesting
// maxLen is a (conservative) upper bound of the byte stream length. For
// each arg/component, it has no more than 2 bytes of data (size, offset),
// and no more than one {, }, ... at each level (it cannot have both the
// data and ... unless it is the last one, just be conservative). Plus 1
// for _endSeq.
TraceArgsMaxLen = (TraceArgsMaxDepth*3+2)*TraceArgsLimit + 1
)
// Populate the data.
// The data is a stream of bytes, which contains the offsets and sizes of the
// non-aggregate arguments or non-aggregate fields/elements of aggregate-typed
// arguments, along with special "operators". Specifically,
// - for each non-aggregate arg/field/element, its offset from FP (1 byte) and
// size (1 byte)
// - special operators:
// - 0xff - end of sequence
// - 0xfe - print { (at the start of an aggregate-typed argument)
// - 0xfd - print } (at the end of an aggregate-typed argument)
// - 0xfc - print ... (more args/fields/elements)
// - 0xfb - print _ (offset too large)
const (
TraceArgsEndSeq = 0xff
TraceArgsStartAgg = 0xfe
TraceArgsEndAgg = 0xfd
TraceArgsDotdotdot = 0xfc
TraceArgsOffsetTooLarge = 0xfb
TraceArgsSpecial = 0xf0 // above this are operators, below this are ordinary offsets
)
// MaxPtrmaskBytes is the maximum length of a GC ptrmask bitmap,
// which holds 1-bit entries describing where pointers are in a given type.
// Above this length, the GC information is recorded as a GC program,
// which can express repetition compactly. In either form, the
// information is used by the runtime to initialize the heap bitmap,
// and for large types (like 128 or more words), they are roughly the
// same speed. GC programs are never much larger and often more
// compact. (If large arrays are involved, they can be arbitrarily
// more compact.)
//
// The cutoff must be large enough that any allocation large enough to
// use a GC program is large enough that it does not share heap bitmap
// bytes with any other objects, allowing the GC program execution to
// assume an aligned start and not use atomic operations. In the current
// runtime, this means all malloc size classes larger than the cutoff must
// be multiples of four words. On 32-bit systems that's 16 bytes, and
// all size classes >= 16 bytes are 16-byte aligned, so no real constraint.
// On 64-bit systems, that's 32 bytes, and 32-byte alignment is guaranteed
// for size classes >= 256 bytes. On a 64-bit system, 256 bytes allocated
// is 32 pointers, the bits for which fit in 4 bytes. So MaxPtrmaskBytes
// must be >= 4.
//
// We used to use 16 because the GC programs do have some constant overhead
// to get started, and processing 128 pointers seems to be enough to
// amortize that overhead well.
//
// To make sure that the runtime's chansend can call typeBitsBulkBarrier,
// we raised the limit to 2048, so that even 32-bit systems are guaranteed to
// use bitmaps for objects up to 64 kB in size.
const MaxPtrmaskBytes = 2048

19
src/internal/asan/asan.go Normal file
View File

@@ -0,0 +1,19 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build asan
package asan
import (
"unsafe"
)
const Enabled = true
//go:linkname Read runtime.asanread
func Read(addr unsafe.Pointer, len uintptr)
//go:linkname Write runtime.asanwrite
func Write(addr unsafe.Pointer, len uintptr)

10
src/internal/asan/doc.go Normal file
View File

@@ -0,0 +1,10 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package asan contains helper functions for manually instrumenting
// code for the address sanitizer.
// The runtime package intentionally exports these functions only in the
// asan build; this package exports them unconditionally but without the
// "asan" build tag they are no-ops.
package asan

View File

@@ -0,0 +1,17 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !asan
package asan
import (
"unsafe"
)
const Enabled = false
func Read(addr unsafe.Pointer, len uintptr) {}
func Write(addr unsafe.Pointer, len uintptr) {}

View File

@@ -0,0 +1,778 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bisect can be used by compilers and other programs
// to serve as a target for the bisect debugging tool.
// See [golang.org/x/tools/cmd/bisect] for details about using the tool.
//
// To be a bisect target, allowing bisect to help determine which of a set of independent
// changes provokes a failure, a program needs to:
//
// 1. Define a way to accept a change pattern on its command line or in its environment.
// The most common mechanism is a command-line flag.
// The pattern can be passed to [New] to create a [Matcher], the compiled form of a pattern.
//
// 2. Assign each change a unique ID. One possibility is to use a sequence number,
// but the most common mechanism is to hash some kind of identifying information
// like the file and line number where the change might be applied.
// [Hash] hashes its arguments to compute an ID.
//
// 3. Enable each change that the pattern says should be enabled.
// The [Matcher.ShouldEnable] method answers this question for a given change ID.
//
// 4. Print a report identifying each change that the pattern says should be printed.
// The [Matcher.ShouldPrint] method answers this question for a given change ID.
// The report consists of one more lines on standard error or standard output
// that contain a “match marker”. [Marker] returns the match marker for a given ID.
// When bisect reports a change as causing the failure, it identifies the change
// by printing the report lines with the match marker removed.
//
// # Example Usage
//
// A program starts by defining how it receives the pattern. In this example, we will assume a flag.
// The next step is to compile the pattern:
//
// m, err := bisect.New(patternFlag)
// if err != nil {
// log.Fatal(err)
// }
//
// Then, each time a potential change is considered, the program computes
// a change ID by hashing identifying information (source file and line, in this case)
// and then calls m.ShouldPrint and m.ShouldEnable to decide whether to
// print and enable the change, respectively. The two can return different values
// depending on whether bisect is trying to find a minimal set of changes to
// disable or to enable to provoke the failure.
//
// It is usually helpful to write a helper function that accepts the identifying information
// and then takes care of hashing, printing, and reporting whether the identified change
// should be enabled. For example, a helper for changes identified by a file and line number
// would be:
//
// func ShouldEnable(file string, line int) {
// h := bisect.Hash(file, line)
// if m.ShouldPrint(h) {
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
// }
// return m.ShouldEnable(h)
// }
//
// Finally, note that New returns a nil Matcher when there is no pattern,
// meaning that the target is not running under bisect at all,
// so all changes should be enabled and none should be printed.
// In that common case, the computation of the hash can be avoided entirely
// by checking for m == nil first:
//
// func ShouldEnable(file string, line int) bool {
// if m == nil {
// return true
// }
// h := bisect.Hash(file, line)
// if m.ShouldPrint(h) {
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
// }
// return m.ShouldEnable(h)
// }
//
// When the identifying information is expensive to format, this code can call
// [Matcher.MarkerOnly] to find out whether short report lines containing only the
// marker are permitted for a given run. (Bisect permits such lines when it is
// still exploring the space of possible changes and will not be showing the
// output to the user.) If so, the client can choose to print only the marker:
//
// func ShouldEnable(file string, line int) bool {
// if m == nil {
// return true
// }
// h := bisect.Hash(file, line)
// if m.ShouldPrint(h) {
// if m.MarkerOnly() {
// bisect.PrintMarker(os.Stderr, h)
// } else {
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
// }
// }
// return m.ShouldEnable(h)
// }
//
// This specific helper deciding whether to enable a change identified by
// file and line number and printing about the change when necessary is
// provided by the [Matcher.FileLine] method.
//
// Another common usage is deciding whether to make a change in a function
// based on the caller's stack, to identify the specific calling contexts that the
// change breaks. The [Matcher.Stack] method takes care of obtaining the stack,
// printing it when necessary, and reporting whether to enable the change
// based on that stack.
//
// # Pattern Syntax
//
// Patterns are generated by the bisect tool and interpreted by [New].
// Users should not have to understand the patterns except when
// debugging a target's bisect support or debugging the bisect tool itself.
//
// The pattern syntax selecting a change is a sequence of bit strings
// separated by + and - operators. Each bit string denotes the set of
// changes with IDs ending in those bits, + is set addition, - is set subtraction,
// and the expression is evaluated in the usual left-to-right order.
// The special binary number “y” denotes the set of all changes,
// standing in for the empty bit string.
// In the expression, all the + operators must appear before all the - operators.
// A leading + adds to an empty set. A leading - subtracts from the set of all
// possible suffixes.
//
// For example:
//
// - “01+10” and “+01+10” both denote the set of changes
// with IDs ending with the bits 01 or 10.
//
// - “01+10-1001” denotes the set of changes with IDs
// ending with the bits 01 or 10, but excluding those ending in 1001.
//
// - “-01-1000” and “y-01-1000 both denote the set of all changes
// with IDs not ending in 01 nor 1000.
//
// - “0+1-01+001” is not a valid pattern, because all the + operators do not
// appear before all the - operators.
//
// In the syntaxes described so far, the pattern specifies the changes to
// enable and report. If a pattern is prefixed by a “!”, the meaning
// changes: the pattern specifies the changes to DISABLE and report. This
// mode of operation is needed when a program passes with all changes
// enabled but fails with no changes enabled. In this case, bisect
// searches for minimal sets of changes to disable.
// Put another way, the leading “!” inverts the result from [Matcher.ShouldEnable]
// but does not invert the result from [Matcher.ShouldPrint].
//
// As a convenience for manual debugging, “n” is an alias for “!y”,
// meaning to disable and report all changes.
//
// Finally, a leading “v” in the pattern indicates that the reports will be shown
// to the user of bisect to describe the changes involved in a failure.
// At the API level, the leading “v” causes [Matcher.Visible] to return true.
// See the next section for details.
//
// # Match Reports
//
// The target program must enable only those changed matched
// by the pattern, and it must print a match report for each such change.
// A match report consists of one or more lines of text that will be
// printed by the bisect tool to describe a change implicated in causing
// a failure. Each line in the report for a given change must contain a
// match marker with that change ID, as returned by [Marker].
// The markers are elided when displaying the lines to the user.
//
// A match marker has the form “[bisect-match 0x1234]” where
// 0x1234 is the change ID in hexadecimal.
// An alternate form is “[bisect-match 010101]”, giving the change ID in binary.
//
// When [Matcher.Visible] returns false, the match reports are only
// being processed by bisect to learn the set of enabled changes,
// not shown to the user, meaning that each report can be a match
// marker on a line by itself, eliding the usual textual description.
// When the textual description is expensive to compute,
// checking [Matcher.Visible] can help the avoid that expense
// in most runs.
package bisect
import (
"runtime"
"sync"
"sync/atomic"
)
// New creates and returns a new Matcher implementing the given pattern.
// The pattern syntax is defined in the package doc comment.
//
// In addition to the pattern syntax syntax, New("") returns nil, nil.
// The nil *Matcher is valid for use: it returns true from ShouldEnable
// and false from ShouldPrint for all changes. Callers can avoid calling
// [Hash], [Matcher.ShouldEnable], and [Matcher.ShouldPrint] entirely
// when they recognize the nil Matcher.
func New(pattern string) (*Matcher, error) {
if pattern == "" {
return nil, nil
}
m := new(Matcher)
p := pattern
// Special case for leading 'q' so that 'qn' quietly disables, e.g. fmahash=qn to disable fma
// Any instance of 'v' disables 'q'.
if len(p) > 0 && p[0] == 'q' {
m.quiet = true
p = p[1:]
if p == "" {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
}
// Allow multiple v, so that “bisect cmd vPATTERN” can force verbose all the time.
for len(p) > 0 && p[0] == 'v' {
m.verbose = true
m.quiet = false
p = p[1:]
if p == "" {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
}
// Allow multiple !, each negating the last, so that “bisect cmd !PATTERN” works
// even when bisect chooses to add its own !.
m.enable = true
for len(p) > 0 && p[0] == '!' {
m.enable = !m.enable
p = p[1:]
if p == "" {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
}
if p == "n" {
// n is an alias for !y.
m.enable = !m.enable
p = "y"
}
// Parse actual pattern syntax.
result := true
bits := uint64(0)
start := 0
wid := 1 // 1-bit (binary); sometimes 4-bit (hex)
for i := 0; i <= len(p); i++ {
// Imagine a trailing - at the end of the pattern to flush final suffix
c := byte('-')
if i < len(p) {
c = p[i]
}
if i == start && wid == 1 && c == 'x' { // leading x for hex
start = i + 1
wid = 4
continue
}
switch c {
default:
return nil, &parseError{"invalid pattern syntax: " + pattern}
case '2', '3', '4', '5', '6', '7', '8', '9':
if wid != 4 {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
fallthrough
case '0', '1':
bits <<= wid
bits |= uint64(c - '0')
case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F':
if wid != 4 {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
bits <<= 4
bits |= uint64(c&^0x20 - 'A' + 10)
case 'y':
if i+1 < len(p) && (p[i+1] == '0' || p[i+1] == '1') {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
bits = 0
case '+', '-':
if c == '+' && result == false {
// Have already seen a -. Should be - from here on.
return nil, &parseError{"invalid pattern syntax (+ after -): " + pattern}
}
if i > 0 {
n := (i - start) * wid
if n > 64 {
return nil, &parseError{"pattern bits too long: " + pattern}
}
if n <= 0 {
return nil, &parseError{"invalid pattern syntax: " + pattern}
}
if p[start] == 'y' {
n = 0
}
mask := uint64(1)<<n - 1
m.list = append(m.list, cond{mask, bits, result})
} else if c == '-' {
// leading - subtracts from complete set
m.list = append(m.list, cond{0, 0, true})
}
bits = 0
result = c == '+'
start = i + 1
wid = 1
}
}
return m, nil
}
// A Matcher is the parsed, compiled form of a PATTERN string.
// The nil *Matcher is valid: it has all changes enabled but none reported.
type Matcher struct {
verbose bool // annotate reporting with human-helpful information
quiet bool // disables all reporting. reset if verbose is true. use case is -d=fmahash=qn
enable bool // when true, list is for “enable and report” (when false, “disable and report”)
list []cond // conditions; later ones win over earlier ones
dedup atomic.Pointer[dedup]
}
// A cond is a single condition in the matcher.
// Given an input id, if id&mask == bits, return the result.
type cond struct {
mask uint64
bits uint64
result bool
}
// MarkerOnly reports whether it is okay to print only the marker for
// a given change, omitting the identifying information.
// MarkerOnly returns true when bisect is using the printed reports
// only for an intermediate search step, not for showing to users.
func (m *Matcher) MarkerOnly() bool {
return !m.verbose
}
// ShouldEnable reports whether the change with the given id should be enabled.
func (m *Matcher) ShouldEnable(id uint64) bool {
if m == nil {
return true
}
return m.matchResult(id) == m.enable
}
// ShouldPrint reports whether to print identifying information about the change with the given id.
func (m *Matcher) ShouldPrint(id uint64) bool {
if m == nil || m.quiet {
return false
}
return m.matchResult(id)
}
// matchResult returns the result from the first condition that matches id.
func (m *Matcher) matchResult(id uint64) bool {
for i := len(m.list) - 1; i >= 0; i-- {
c := &m.list[i]
if id&c.mask == c.bits {
return c.result
}
}
return false
}
// FileLine reports whether the change identified by file and line should be enabled.
// If the change should be printed, FileLine prints a one-line report to w.
func (m *Matcher) FileLine(w Writer, file string, line int) bool {
if m == nil {
return true
}
return m.fileLine(w, file, line)
}
// fileLine does the real work for FileLine.
// This lets FileLine's body handle m == nil and potentially be inlined.
func (m *Matcher) fileLine(w Writer, file string, line int) bool {
h := Hash(file, line)
if m.ShouldPrint(h) {
if m.MarkerOnly() {
PrintMarker(w, h)
} else {
printFileLine(w, h, file, line)
}
}
return m.ShouldEnable(h)
}
// printFileLine prints a non-marker-only report for file:line to w.
func printFileLine(w Writer, h uint64, file string, line int) error {
const markerLen = 40 // overestimate
b := make([]byte, 0, markerLen+len(file)+24)
b = AppendMarker(b, h)
b = appendFileLine(b, file, line)
b = append(b, '\n')
_, err := w.Write(b)
return err
}
// appendFileLine appends file:line to dst, returning the extended slice.
func appendFileLine(dst []byte, file string, line int) []byte {
dst = append(dst, file...)
dst = append(dst, ':')
u := uint(line)
if line < 0 {
dst = append(dst, '-')
u = -u
}
var buf [24]byte
i := len(buf)
for i == len(buf) || u > 0 {
i--
buf[i] = '0' + byte(u%10)
u /= 10
}
dst = append(dst, buf[i:]...)
return dst
}
// MatchStack assigns the current call stack a change ID.
// If the stack should be printed, MatchStack prints it.
// Then MatchStack reports whether a change at the current call stack should be enabled.
func (m *Matcher) Stack(w Writer) bool {
if m == nil {
return true
}
return m.stack(w)
}
// stack does the real work for Stack.
// This lets stack's body handle m == nil and potentially be inlined.
func (m *Matcher) stack(w Writer) bool {
const maxStack = 16
var stk [maxStack]uintptr
n := runtime.Callers(2, stk[:])
// caller #2 is not for printing; need it to normalize PCs if ASLR.
if n <= 1 {
return false
}
base := stk[0]
// normalize PCs
for i := range stk[:n] {
stk[i] -= base
}
h := Hash(stk[:n])
if m.ShouldPrint(h) {
var d *dedup
for {
d = m.dedup.Load()
if d != nil {
break
}
d = new(dedup)
if m.dedup.CompareAndSwap(nil, d) {
break
}
}
if m.MarkerOnly() {
if !d.seenLossy(h) {
PrintMarker(w, h)
}
} else {
if !d.seen(h) {
// Restore PCs in stack for printing
for i := range stk[:n] {
stk[i] += base
}
printStack(w, h, stk[1:n])
}
}
}
return m.ShouldEnable(h)
}
// Writer is the same interface as io.Writer.
// It is duplicated here to avoid importing io.
type Writer interface {
Write([]byte) (int, error)
}
// PrintMarker prints to w a one-line report containing only the marker for h.
// It is appropriate to use when [Matcher.ShouldPrint] and [Matcher.MarkerOnly] both return true.
func PrintMarker(w Writer, h uint64) error {
var buf [50]byte
b := AppendMarker(buf[:0], h)
b = append(b, '\n')
_, err := w.Write(b)
return err
}
// printStack prints to w a multi-line report containing a formatting of the call stack stk,
// with each line preceded by the marker for h.
func printStack(w Writer, h uint64, stk []uintptr) error {
buf := make([]byte, 0, 2048)
var prefixBuf [100]byte
prefix := AppendMarker(prefixBuf[:0], h)
frames := runtime.CallersFrames(stk)
for {
f, more := frames.Next()
buf = append(buf, prefix...)
buf = append(buf, f.Function...)
buf = append(buf, "()\n"...)
buf = append(buf, prefix...)
buf = append(buf, '\t')
buf = appendFileLine(buf, f.File, f.Line)
buf = append(buf, '\n')
if !more {
break
}
}
buf = append(buf, prefix...)
buf = append(buf, '\n')
_, err := w.Write(buf)
return err
}
// Marker returns the match marker text to use on any line reporting details
// about a match of the given ID.
// It always returns the hexadecimal format.
func Marker(id uint64) string {
return string(AppendMarker(nil, id))
}
// AppendMarker is like [Marker] but appends the marker to dst.
func AppendMarker(dst []byte, id uint64) []byte {
const prefix = "[bisect-match 0x"
var buf [len(prefix) + 16 + 1]byte
copy(buf[:], prefix)
for i := 0; i < 16; i++ {
buf[len(prefix)+i] = "0123456789abcdef"[id>>60]
id <<= 4
}
buf[len(prefix)+16] = ']'
return append(dst, buf[:]...)
}
// CutMarker finds the first match marker in line and removes it,
// returning the shortened line (with the marker removed),
// the ID from the match marker,
// and whether a marker was found at all.
// If there is no marker, CutMarker returns line, 0, false.
func CutMarker(line string) (short string, id uint64, ok bool) {
// Find first instance of prefix.
prefix := "[bisect-match "
i := 0
for ; ; i++ {
if i >= len(line)-len(prefix) {
return line, 0, false
}
if line[i] == '[' && line[i:i+len(prefix)] == prefix {
break
}
}
// Scan to ].
j := i + len(prefix)
for j < len(line) && line[j] != ']' {
j++
}
if j >= len(line) {
return line, 0, false
}
// Parse id.
idstr := line[i+len(prefix) : j]
if len(idstr) >= 3 && idstr[:2] == "0x" {
// parse hex
if len(idstr) > 2+16 { // max 0x + 16 digits
return line, 0, false
}
for i := 2; i < len(idstr); i++ {
id <<= 4
switch c := idstr[i]; {
case '0' <= c && c <= '9':
id |= uint64(c - '0')
case 'a' <= c && c <= 'f':
id |= uint64(c - 'a' + 10)
case 'A' <= c && c <= 'F':
id |= uint64(c - 'A' + 10)
}
}
} else {
if idstr == "" || len(idstr) > 64 { // min 1 digit, max 64 digits
return line, 0, false
}
// parse binary
for i := 0; i < len(idstr); i++ {
id <<= 1
switch c := idstr[i]; c {
default:
return line, 0, false
case '0', '1':
id |= uint64(c - '0')
}
}
}
// Construct shortened line.
// Remove at most one space from around the marker,
// so that "foo [marker] bar" shortens to "foo bar".
j++ // skip ]
if i > 0 && line[i-1] == ' ' {
i--
} else if j < len(line) && line[j] == ' ' {
j++
}
short = line[:i] + line[j:]
return short, id, true
}
// Hash computes a hash of the data arguments,
// each of which must be of type string, byte, int, uint, int32, uint32, int64, uint64, uintptr, or a slice of one of those types.
func Hash(data ...any) uint64 {
h := offset64
for _, v := range data {
switch v := v.(type) {
default:
// Note: Not printing the type, because reflect.ValueOf(v)
// would make the interfaces prepared by the caller escape
// and therefore allocate. This way, Hash(file, line) runs
// without any allocation. It should be clear from the
// source code calling Hash what the bad argument was.
panic("bisect.Hash: unexpected argument type")
case string:
h = fnvString(h, v)
case byte:
h = fnv(h, v)
case int:
h = fnvUint64(h, uint64(v))
case uint:
h = fnvUint64(h, uint64(v))
case int32:
h = fnvUint32(h, uint32(v))
case uint32:
h = fnvUint32(h, v)
case int64:
h = fnvUint64(h, uint64(v))
case uint64:
h = fnvUint64(h, v)
case uintptr:
h = fnvUint64(h, uint64(v))
case []string:
for _, x := range v {
h = fnvString(h, x)
}
case []byte:
for _, x := range v {
h = fnv(h, x)
}
case []int:
for _, x := range v {
h = fnvUint64(h, uint64(x))
}
case []uint:
for _, x := range v {
h = fnvUint64(h, uint64(x))
}
case []int32:
for _, x := range v {
h = fnvUint32(h, uint32(x))
}
case []uint32:
for _, x := range v {
h = fnvUint32(h, x)
}
case []int64:
for _, x := range v {
h = fnvUint64(h, uint64(x))
}
case []uint64:
for _, x := range v {
h = fnvUint64(h, x)
}
case []uintptr:
for _, x := range v {
h = fnvUint64(h, uint64(x))
}
}
}
return h
}
// Trivial error implementation, here to avoid importing errors.
// parseError is a trivial error implementation,
// defined here to avoid importing errors.
type parseError struct{ text string }
func (e *parseError) Error() string { return e.text }
// FNV-1a implementation. See Go's hash/fnv/fnv.go.
// Copied here for simplicity (can handle integers more directly)
// and to avoid importing hash/fnv.
const (
offset64 uint64 = 14695981039346656037
prime64 uint64 = 1099511628211
)
func fnv(h uint64, x byte) uint64 {
h ^= uint64(x)
h *= prime64
return h
}
func fnvString(h uint64, x string) uint64 {
for i := 0; i < len(x); i++ {
h ^= uint64(x[i])
h *= prime64
}
return h
}
func fnvUint64(h uint64, x uint64) uint64 {
for i := 0; i < 8; i++ {
h ^= x & 0xFF
x >>= 8
h *= prime64
}
return h
}
func fnvUint32(h uint64, x uint32) uint64 {
for i := 0; i < 4; i++ {
h ^= uint64(x & 0xFF)
x >>= 8
h *= prime64
}
return h
}
// A dedup is a deduplicator for call stacks, so that we only print
// a report for new call stacks, not for call stacks we've already
// reported.
//
// It has two modes: an approximate but lock-free mode that
// may still emit some duplicates, and a precise mode that uses
// a lock and never emits duplicates.
type dedup struct {
// 128-entry 4-way, lossy cache for seenLossy
recent [128][4]uint64
// complete history for seen
mu sync.Mutex
m map[uint64]bool
}
// seen records that h has now been seen and reports whether it was seen before.
// When seen returns false, the caller is expected to print a report for h.
func (d *dedup) seen(h uint64) bool {
d.mu.Lock()
if d.m == nil {
d.m = make(map[uint64]bool)
}
seen := d.m[h]
d.m[h] = true
d.mu.Unlock()
return seen
}
// seenLossy is a variant of seen that avoids a lock by using a cache of recently seen hashes.
// Each cache entry is N-way set-associative: h can appear in any of the slots.
// If h does not appear in any of them, then it is inserted into a random slot,
// overwriting whatever was there before.
func (d *dedup) seenLossy(h uint64) bool {
cache := &d.recent[uint(h)%uint(len(d.recent))]
for i := 0; i < len(cache); i++ {
if atomic.LoadUint64(&cache[i]) == h {
return true
}
}
// Compute index in set to evict as hash of current set.
ch := offset64
for _, x := range cache {
ch = fnvUint64(ch, x)
}
atomic.StoreUint64(&cache[uint(ch)%uint(len(cache))], h)
return false
}

View File

@@ -0,0 +1,414 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package buildcfg provides access to the build configuration
// described by the current environment. It is for use by build tools
// such as cmd/go or cmd/compile and for setting up go/build's Default context.
//
// Note that it does NOT provide access to the build configuration used to
// build the currently-running binary. For that, use runtime.GOOS etc
// as well as internal/goexperiment.
package buildcfg
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
)
var (
GOROOT = os.Getenv("GOROOT") // cached for efficiency
GOARCH = envOr("GOARCH", defaultGOARCH)
GOOS = envOr("GOOS", defaultGOOS)
GO386 = envOr("GO386", defaultGO386)
GOAMD64 = goamd64()
GOARM = goarm()
GOARM64 = goarm64()
GOMIPS = gomips()
GOMIPS64 = gomips64()
GOPPC64 = goppc64()
GORISCV64 = goriscv64()
GOWASM = gowasm()
ToolTags = toolTags()
GO_LDSO = defaultGO_LDSO
Version = version
)
// Error is one of the errors found (if any) in the build configuration.
var Error error
// Check exits the program with a fatal error if Error is non-nil.
func Check() {
if Error != nil {
fmt.Fprintf(os.Stderr, "%s: %v\n", filepath.Base(os.Args[0]), Error)
os.Exit(2)
}
}
func envOr(key, value string) string {
if x := os.Getenv(key); x != "" {
return x
}
return value
}
func goamd64() int {
switch v := envOr("GOAMD64", defaultGOAMD64); v {
case "v1":
return 1
case "v2":
return 2
case "v3":
return 3
case "v4":
return 4
}
Error = fmt.Errorf("invalid GOAMD64: must be v1, v2, v3, v4")
return int(defaultGOAMD64[len("v")] - '0')
}
type goarmFeatures struct {
Version int
SoftFloat bool
}
func (g goarmFeatures) String() string {
armStr := strconv.Itoa(g.Version)
if g.SoftFloat {
armStr += ",softfloat"
} else {
armStr += ",hardfloat"
}
return armStr
}
func goarm() (g goarmFeatures) {
const (
softFloatOpt = ",softfloat"
hardFloatOpt = ",hardfloat"
)
def := defaultGOARM
if GOOS == "android" && GOARCH == "arm" {
// Android arm devices always support GOARM=7.
def = "7"
}
v := envOr("GOARM", def)
floatSpecified := false
if strings.HasSuffix(v, softFloatOpt) {
g.SoftFloat = true
floatSpecified = true
v = v[:len(v)-len(softFloatOpt)]
}
if strings.HasSuffix(v, hardFloatOpt) {
floatSpecified = true
v = v[:len(v)-len(hardFloatOpt)]
}
switch v {
case "5":
g.Version = 5
case "6":
g.Version = 6
case "7":
g.Version = 7
default:
Error = fmt.Errorf("invalid GOARM: must start with 5, 6, or 7, and may optionally end in either %q or %q", hardFloatOpt, softFloatOpt)
g.Version = int(def[0] - '0')
}
// 5 defaults to softfloat. 6 and 7 default to hardfloat.
if !floatSpecified && g.Version == 5 {
g.SoftFloat = true
}
return
}
type Goarm64Features struct {
Version string
// Large Systems Extension
LSE bool
// ARM v8.0 Cryptographic Extension. It includes the following features:
// * FEAT_AES, which includes the AESD and AESE instructions.
// * FEAT_PMULL, which includes the PMULL, PMULL2 instructions.
// * FEAT_SHA1, which includes the SHA1* instructions.
// * FEAT_SHA256, which includes the SHA256* instructions.
Crypto bool
}
func (g Goarm64Features) String() string {
arm64Str := g.Version
if g.LSE {
arm64Str += ",lse"
}
if g.Crypto {
arm64Str += ",crypto"
}
return arm64Str
}
func ParseGoarm64(v string) (g Goarm64Features, e error) {
const (
lseOpt = ",lse"
cryptoOpt = ",crypto"
)
g.LSE = false
g.Crypto = false
// We allow any combination of suffixes, in any order
for {
if strings.HasSuffix(v, lseOpt) {
g.LSE = true
v = v[:len(v)-len(lseOpt)]
continue
}
if strings.HasSuffix(v, cryptoOpt) {
g.Crypto = true
v = v[:len(v)-len(cryptoOpt)]
continue
}
break
}
switch v {
case "v8.0":
g.Version = v
case "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6", "v8.7", "v8.8", "v8.9",
"v9.0", "v9.1", "v9.2", "v9.3", "v9.4", "v9.5":
g.Version = v
// LSE extension is mandatory starting from 8.1
g.LSE = true
default:
e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
lseOpt, cryptoOpt)
g.Version = defaultGOARM64
}
return
}
func goarm64() (g Goarm64Features) {
g, Error = ParseGoarm64(envOr("GOARM64", defaultGOARM64))
return
}
// Returns true if g supports giving ARM64 ISA
// Note that this function doesn't accept / test suffixes (like ",lse" or ",crypto")
func (g Goarm64Features) Supports(s string) bool {
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
if len(s) != 4 {
return false
}
major := s[1]
minor := s[3]
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
if major < '8' || major > '9' ||
minor < '0' || minor > '9' ||
s[0] != 'v' || s[2] != '.' {
return false
}
g_major := g.Version[1]
g_minor := g.Version[3]
if major == g_major {
return minor <= g_minor
} else if g_major == '9' {
// v9.0 diverged from v8.5. This means we should compare with g_minor increased by five.
return minor <= g_minor+5
} else {
return false
}
}
func gomips() string {
switch v := envOr("GOMIPS", defaultGOMIPS); v {
case "hardfloat", "softfloat":
return v
}
Error = fmt.Errorf("invalid GOMIPS: must be hardfloat, softfloat")
return defaultGOMIPS
}
func gomips64() string {
switch v := envOr("GOMIPS64", defaultGOMIPS64); v {
case "hardfloat", "softfloat":
return v
}
Error = fmt.Errorf("invalid GOMIPS64: must be hardfloat, softfloat")
return defaultGOMIPS64
}
func goppc64() int {
switch v := envOr("GOPPC64", defaultGOPPC64); v {
case "power8":
return 8
case "power9":
return 9
case "power10":
return 10
}
Error = fmt.Errorf("invalid GOPPC64: must be power8, power9, power10")
return int(defaultGOPPC64[len("power")] - '0')
}
func goriscv64() int {
switch v := envOr("GORISCV64", defaultGORISCV64); v {
case "rva20u64":
return 20
case "rva22u64":
return 22
}
Error = fmt.Errorf("invalid GORISCV64: must be rva20u64, rva22u64")
v := defaultGORISCV64[len("rva"):]
i := strings.IndexFunc(v, func(r rune) bool {
return r < '0' || r > '9'
})
year, _ := strconv.Atoi(v[:i])
return year
}
type gowasmFeatures struct {
SatConv bool
SignExt bool
}
func (f gowasmFeatures) String() string {
var flags []string
if f.SatConv {
flags = append(flags, "satconv")
}
if f.SignExt {
flags = append(flags, "signext")
}
return strings.Join(flags, ",")
}
func gowasm() (f gowasmFeatures) {
for _, opt := range strings.Split(envOr("GOWASM", ""), ",") {
switch opt {
case "satconv":
f.SatConv = true
case "signext":
f.SignExt = true
case "":
// ignore
default:
Error = fmt.Errorf("invalid GOWASM: no such feature %q", opt)
}
}
return
}
func Getgoextlinkenabled() string {
return envOr("GO_EXTLINK_ENABLED", defaultGO_EXTLINK_ENABLED)
}
func toolTags() []string {
tags := experimentTags()
tags = append(tags, gogoarchTags()...)
return tags
}
func experimentTags() []string {
var list []string
// For each experiment that has been enabled in the toolchain, define a
// build tag with the same name but prefixed by "goexperiment." which can be
// used for compiling alternative files for the experiment. This allows
// changes for the experiment, like extra struct fields in the runtime,
// without affecting the base non-experiment code at all.
for _, exp := range Experiment.Enabled() {
list = append(list, "goexperiment."+exp)
}
return list
}
// GOGOARCH returns the name and value of the GO$GOARCH setting.
// For example, if GOARCH is "amd64" it might return "GOAMD64", "v2".
func GOGOARCH() (name, value string) {
switch GOARCH {
case "386":
return "GO386", GO386
case "amd64":
return "GOAMD64", fmt.Sprintf("v%d", GOAMD64)
case "arm":
return "GOARM", GOARM.String()
case "arm64":
return "GOARM64", GOARM64.String()
case "mips", "mipsle":
return "GOMIPS", GOMIPS
case "mips64", "mips64le":
return "GOMIPS64", GOMIPS64
case "ppc64", "ppc64le":
return "GOPPC64", fmt.Sprintf("power%d", GOPPC64)
case "wasm":
return "GOWASM", GOWASM.String()
}
return "", ""
}
func gogoarchTags() []string {
switch GOARCH {
case "386":
return []string{GOARCH + "." + GO386}
case "amd64":
var list []string
for i := 1; i <= GOAMD64; i++ {
list = append(list, fmt.Sprintf("%s.v%d", GOARCH, i))
}
return list
case "arm":
var list []string
for i := 5; i <= GOARM.Version; i++ {
list = append(list, fmt.Sprintf("%s.%d", GOARCH, i))
}
return list
case "arm64":
var list []string
major := int(GOARM64.Version[1] - '0')
minor := int(GOARM64.Version[3] - '0')
for i := 0; i <= minor; i++ {
list = append(list, fmt.Sprintf("%s.v%d.%d", GOARCH, major, i))
}
// ARM64 v9.x also includes support of v8.x+5 (i.e. v9.1 includes v8.(1+5) = v8.6).
if major == 9 {
for i := 0; i <= minor+5 && i <= 9; i++ {
list = append(list, fmt.Sprintf("%s.v%d.%d", GOARCH, 8, i))
}
}
return list
case "mips", "mipsle":
return []string{GOARCH + "." + GOMIPS}
case "mips64", "mips64le":
return []string{GOARCH + "." + GOMIPS64}
case "ppc64", "ppc64le":
var list []string
for i := 8; i <= GOPPC64; i++ {
list = append(list, fmt.Sprintf("%s.power%d", GOARCH, i))
}
return list
case "riscv64":
list := []string{GOARCH + "." + "rva20u64"}
if GORISCV64 >= 22 {
list = append(list, GOARCH+"."+"rva22u64")
}
return list
case "wasm":
var list []string
if GOWASM.SatConv {
list = append(list, GOARCH+".satconv")
}
if GOWASM.SignExt {
list = append(list, GOARCH+".signext")
}
return list
}
return nil
}

View File

@@ -0,0 +1,125 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package buildcfg
import (
"os"
"testing"
)
func TestConfigFlags(t *testing.T) {
os.Setenv("GOAMD64", "v1")
if goamd64() != 1 {
t.Errorf("Wrong parsing of GOAMD64=v1")
}
os.Setenv("GOAMD64", "v4")
if goamd64() != 4 {
t.Errorf("Wrong parsing of GOAMD64=v4")
}
Error = nil
os.Setenv("GOAMD64", "1")
if goamd64(); Error == nil {
t.Errorf("Wrong parsing of GOAMD64=1")
}
os.Setenv("GORISCV64", "rva20u64")
if goriscv64() != 20 {
t.Errorf("Wrong parsing of RISCV64=rva20u64")
}
os.Setenv("GORISCV64", "rva22u64")
if goriscv64() != 22 {
t.Errorf("Wrong parsing of RISCV64=rva22u64")
}
Error = nil
os.Setenv("GORISCV64", "rva22")
if _ = goriscv64(); Error == nil {
t.Errorf("Wrong parsing of RISCV64=rva22")
}
Error = nil
os.Setenv("GOARM64", "v7.0")
if _ = goarm64(); Error == nil {
t.Errorf("Wrong parsing of GOARM64=7.0")
}
Error = nil
os.Setenv("GOARM64", "8.0")
if _ = goarm64(); Error == nil {
t.Errorf("Wrong parsing of GOARM64=8.0")
}
Error = nil
os.Setenv("GOARM64", "v8.0,lsb")
if _ = goarm64(); Error == nil {
t.Errorf("Wrong parsing of GOARM64=v8.0,lsb")
}
os.Setenv("GOARM64", "v8.0,lse")
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != false {
t.Errorf("Wrong parsing of GOARM64=v8.0,lse")
}
os.Setenv("GOARM64", "v8.0,crypto")
if goarm64().Version != "v8.0" || goarm64().LSE != false || goarm64().Crypto != true {
t.Errorf("Wrong parsing of GOARM64=v8.0,crypto")
}
os.Setenv("GOARM64", "v8.0,crypto,lse")
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != true {
t.Errorf("Wrong parsing of GOARM64=v8.0,crypto,lse")
}
os.Setenv("GOARM64", "v8.0,lse,crypto")
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != true {
t.Errorf("Wrong parsing of GOARM64=v8.0,lse,crypto")
}
os.Setenv("GOARM64", "v9.0")
if goarm64().Version != "v9.0" || goarm64().LSE != true || goarm64().Crypto != false {
t.Errorf("Wrong parsing of GOARM64=v9.0")
}
}
func TestGoarm64FeaturesSupports(t *testing.T) {
g, _ := ParseGoarm64("v9.3")
if !g.Supports("v9.3") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.3")
}
if g.Supports("v9.4") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.4")
}
if !g.Supports("v8.8") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v8.8")
}
if g.Supports("v8.9") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v8.9")
}
if g.Supports(",lse") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, ,lse")
}
}
func TestGogoarchTags(t *testing.T) {
old_goarch := GOARCH
old_goarm64 := GOARM64
GOARCH = "arm64"
os.Setenv("GOARM64", "v9.5")
GOARM64 = goarm64()
tags := gogoarchTags()
want := []string{"arm64.v9.0", "arm64.v9.1", "arm64.v9.2", "arm64.v9.3", "arm64.v9.4", "arm64.v9.5",
"arm64.v8.0", "arm64.v8.1", "arm64.v8.2", "arm64.v8.3", "arm64.v8.4", "arm64.v8.5", "arm64.v8.6", "arm64.v8.7", "arm64.v8.8", "arm64.v8.9"}
if len(tags) != len(want) {
t.Errorf("Wrong number of tags for GOARM64=v9.5")
} else {
for i, v := range tags {
if v != want[i] {
t.Error("Wrong tags for GOARM64=v9.5")
break
}
}
}
GOARCH = old_goarch
GOARM64 = old_goarm64
}

View File

@@ -0,0 +1,190 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package buildcfg
import (
"fmt"
"reflect"
"strings"
"internal/goexperiment"
)
// ExperimentFlags represents a set of GOEXPERIMENT flags relative to a baseline
// (platform-default) experiment configuration.
type ExperimentFlags struct {
goexperiment.Flags
baseline goexperiment.Flags
}
// Experiment contains the toolchain experiments enabled for the
// current build.
//
// (This is not necessarily the set of experiments the compiler itself
// was built with.)
//
// experimentBaseline specifies the experiment flags that are enabled by
// default in the current toolchain. This is, in effect, the "control"
// configuration and any variation from this is an experiment.
var Experiment ExperimentFlags = func() ExperimentFlags {
flags, err := ParseGOEXPERIMENT(GOOS, GOARCH, envOr("GOEXPERIMENT", defaultGOEXPERIMENT))
if err != nil {
Error = err
return ExperimentFlags{}
}
return *flags
}()
// DefaultGOEXPERIMENT is the embedded default GOEXPERIMENT string.
// It is not guaranteed to be canonical.
const DefaultGOEXPERIMENT = defaultGOEXPERIMENT
// FramePointerEnabled enables the use of platform conventions for
// saving frame pointers.
//
// This used to be an experiment, but now it's always enabled on
// platforms that support it.
//
// Note: must agree with runtime.framepointer_enabled.
var FramePointerEnabled = GOARCH == "amd64" || GOARCH == "arm64"
// ParseGOEXPERIMENT parses a (GOOS, GOARCH, GOEXPERIMENT)
// configuration tuple and returns the enabled and baseline experiment
// flag sets.
//
// TODO(mdempsky): Move to internal/goexperiment.
func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) {
// regabiSupported is set to true on platforms where register ABI is
// supported and enabled by default.
// regabiAlwaysOn is set to true on platforms where register ABI is
// always on.
var regabiSupported, regabiAlwaysOn bool
switch goarch {
case "amd64", "arm64", "loong64", "ppc64le", "ppc64", "riscv64":
regabiAlwaysOn = true
regabiSupported = true
}
baseline := goexperiment.Flags{
RegabiWrappers: regabiSupported,
RegabiArgs: regabiSupported,
CoverageRedesign: true,
}
// Start with the statically enabled set of experiments.
flags := &ExperimentFlags{
Flags: baseline,
baseline: baseline,
}
// Pick up any changes to the baseline configuration from the
// GOEXPERIMENT environment. This can be set at make.bash time
// and overridden at build time.
if goexp != "" {
// Create a map of known experiment names.
names := make(map[string]func(bool))
rv := reflect.ValueOf(&flags.Flags).Elem()
rt := rv.Type()
for i := 0; i < rt.NumField(); i++ {
field := rv.Field(i)
names[strings.ToLower(rt.Field(i).Name)] = field.SetBool
}
// "regabi" is an alias for all working regabi
// subexperiments, and not an experiment itself. Doing
// this as an alias make both "regabi" and "noregabi"
// do the right thing.
names["regabi"] = func(v bool) {
flags.RegabiWrappers = v
flags.RegabiArgs = v
}
// Parse names.
for _, f := range strings.Split(goexp, ",") {
if f == "" {
continue
}
if f == "none" {
// GOEXPERIMENT=none disables all experiment flags.
// This is used by cmd/dist, which doesn't know how
// to build with any experiment flags.
flags.Flags = goexperiment.Flags{}
continue
}
val := true
if strings.HasPrefix(f, "no") {
f, val = f[2:], false
}
set, ok := names[f]
if !ok {
return nil, fmt.Errorf("unknown GOEXPERIMENT %s", f)
}
set(val)
}
}
if regabiAlwaysOn {
flags.RegabiWrappers = true
flags.RegabiArgs = true
}
// regabi is only supported on amd64, arm64, loong64, riscv64, ppc64 and ppc64le.
if !regabiSupported {
flags.RegabiWrappers = false
flags.RegabiArgs = false
}
// Check regabi dependencies.
if flags.RegabiArgs && !flags.RegabiWrappers {
return nil, fmt.Errorf("GOEXPERIMENT regabiargs requires regabiwrappers")
}
return flags, nil
}
// String returns the canonical GOEXPERIMENT string to enable this experiment
// configuration. (Experiments in the same state as in the baseline are elided.)
func (exp *ExperimentFlags) String() string {
return strings.Join(expList(&exp.Flags, &exp.baseline, false), ",")
}
// expList returns the list of lower-cased experiment names for
// experiments that differ from base. base may be nil to indicate no
// experiments. If all is true, then include all experiment flags,
// regardless of base.
func expList(exp, base *goexperiment.Flags, all bool) []string {
var list []string
rv := reflect.ValueOf(exp).Elem()
var rBase reflect.Value
if base != nil {
rBase = reflect.ValueOf(base).Elem()
}
rt := rv.Type()
for i := 0; i < rt.NumField(); i++ {
name := strings.ToLower(rt.Field(i).Name)
val := rv.Field(i).Bool()
baseVal := false
if base != nil {
baseVal = rBase.Field(i).Bool()
}
if all || val != baseVal {
if val {
list = append(list, name)
} else {
list = append(list, "no"+name)
}
}
}
return list
}
// Enabled returns a list of enabled experiments, as
// lower-cased experiment names.
func (exp *ExperimentFlags) Enabled() []string {
return expList(&exp.Flags, nil, false)
}
// All returns a list of all experiment settings.
// Disabled experiments appear in the list prefixed by "no".
func (exp *ExperimentFlags) All() []string {
return expList(&exp.Flags, nil, true)
}

View File

@@ -0,0 +1,118 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
import (
"internal/cpu"
"unsafe"
)
// Offsets into internal/cpu records for use in assembly.
const (
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9)
)
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
// If MaxLen is not 0, make sure MaxLen >= 4.
var MaxLen int
// PrimeRK is the prime base used in Rabin-Karp algorithm.
const PrimeRK = 16777619
// HashStr returns the hash and the appropriate multiplicative
// factor for use in Rabin-Karp algorithm.
func HashStr[T string | []byte](sep T) (uint32, uint32) {
hash := uint32(0)
for i := 0; i < len(sep); i++ {
hash = hash*PrimeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, PrimeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}
// HashStrRev returns the hash of the reverse of sep and the
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
func HashStrRev[T string | []byte](sep T) (uint32, uint32) {
hash := uint32(0)
for i := len(sep) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, PrimeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}
// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
// first occurrence of sep in s, or -1 if not present.
func IndexRabinKarp[T string | []byte](s, sep T) int {
// Rabin-Karp search
hashss, pow := HashStr(sep)
n := len(sep)
var h uint32
for i := 0; i < n; i++ {
h = h*PrimeRK + uint32(s[i])
}
if h == hashss && string(s[:n]) == string(sep) {
return 0
}
for i := n; i < len(s); {
h *= PrimeRK
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
if h == hashss && string(s[i-n:i]) == string(sep) {
return i - n
}
}
return -1
}
// LastIndexRabinKarp uses the Rabin-Karp search algorithm to return the last index of the
// occurrence of sep in s, or -1 if not present.
func LastIndexRabinKarp[T string | []byte](s, sep T) int {
// Rabin-Karp search from the end of the string
hashss, pow := HashStrRev(sep)
n := len(sep)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
h = h*PrimeRK + uint32(s[i])
}
if h == hashss && string(s[last:]) == string(sep) {
return last
}
for i := last - 1; i >= 0; i-- {
h *= PrimeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashss && string(s[i:i+n]) == string(sep) {
return i
}
}
return -1
}
// MakeNoZero makes a slice of length n and capacity of at least n Bytes
// without zeroing the bytes (including the bytes between len and cap).
// It is the caller's responsibility to ensure uninitialized bytes
// do not leak to the end user.
func MakeNoZero(n int) []byte

View File

@@ -0,0 +1,144 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-28
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+12(FP), DI
MOVL b_len+16(FP), DX
LEAL ret+24(FP), AX
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+8(FP), DI
MOVL b_len+12(FP), DX
LEAL ret+16(FP), AX
JMP cmpbody<>(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// AX = address of return word (set to 1/0/-1)
TEXT cmpbody<>(SB),NOSPLIT,$0-0
MOVL DX, BP
SUBL BX, DX // DX = blen-alen
JLE 2(PC)
MOVL BX, BP // BP = min(alen, blen)
CMPL SI, DI
JEQ allsame
CMPL BP, $4
JB small
#ifdef GO386_softfloat
JMP mediumloop
#endif
largeloop:
CMPL BP, $16
JB mediumloop
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, BX
XORL $0xffff, BX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
JMP largeloop
diff16:
BSFL BX, BX // index of first byte that differs
XORL DX, DX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI DX
LEAL -1(DX*2), DX // convert 1/0 to +1/-1
MOVL DX, (AX)
RET
mediumloop:
CMPL BP, $4
JBE _0through4
MOVL (SI), BX
MOVL (DI), CX
CMPL BX, CX
JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
JMP mediumloop
_0through4:
MOVL -4(SI)(BP*1), BX
MOVL -4(DI)(BP*1), CX
CMPL BX, CX
JEQ allsame
diff4:
BSWAPL BX // reverse order of bytes
BSWAPL CX
XORL BX, CX // find bit differences
BSRL CX, CX // index of highest bit difference
SHRL CX, BX // move a's bit to bottom
ANDL $1, BX // mask bit
LEAL -1(BX*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// 0-3 bytes in common
small:
LEAL (BP*8), CX
NEGL CX
JEQ allsame
// load si
CMPB SI, $0xfc
JA si_high
MOVL (SI), SI
JMP si_finish
si_high:
MOVL -4(SI)(BP*1), SI
SHRL CX, SI
si_finish:
SHLL CX, SI
// same for di
CMPB DI, $0xfc
JA di_high
MOVL (DI), DI
JMP di_finish
di_high:
MOVL -4(DI)(BP*1), DI
SHRL CX, DI
di_finish:
SHLL CX, DI
BSWAPL SI // reverse order of bytes
BSWAPL DI
XORL SI, DI // find bit differences
JEQ allsame
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
LEAL -1(SI*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// all the bytes in common are the same, so we just need
// to compare the lengths.
allsame:
XORL BX, BX
XORL CX, CX
TESTL DX, DX
SETLT BX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAL -1(CX)(BX*2), BX // 1,0,-1 result
MOVL BX, (AX)
RET

View File

@@ -0,0 +1,237 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
// AX = a_base (want in SI)
// BX = a_len (want in BX)
// CX = a_cap (unused)
// DI = b_base (want in DI)
// SI = b_len (want in DX)
// R8 = b_cap (unused)
MOVQ SI, DX
MOVQ AX, SI
JMP cmpbody<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
// AX = a_base (want in SI)
// BX = a_len (want in BX)
// CX = b_base (want in DI)
// DI = b_len (want in DX)
MOVQ AX, SI
MOVQ DI, DX
MOVQ CX, DI
JMP cmpbody<>(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// output:
// AX = output (-1/0/1)
TEXT cmpbody<>(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB small
CMPQ R8, $63
JBE loop
#ifndef hasAVX2
CMPB internalcpu·X86+const_offsetX86HasAVX2(SB), $1
JEQ big_loop_avx2
JMP big_loop
#else
JMP big_loop_avx2
#endif
loop:
CMPQ R8, $16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
diff64:
ADDQ $48, SI
ADDQ $48, DI
JMP diff16
diff48:
ADDQ $32, SI
ADDQ $32, DI
JMP diff16
diff32:
ADDQ $16, SI
ADDQ $16, DI
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
RET
// 0 through 16 bytes left, alen>=8, blen>=8
_0through16:
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
MOVQ -8(SI)(R8*1), AX
MOVQ -8(DI)(R8*1), CX
CMPQ AX, CX
JEQ allsame
// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1
RET
// 0-7 bytes in common
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
MOVQ -8(SI)(R8*1), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(R8*1), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
RET
// this works for >= 64 bytes of data.
#ifndef hasAVX2
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff16
MOVOU 16(SI), X0
MOVOU 16(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff32
MOVOU 32(SI), X0
MOVOU 32(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff48
MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff64
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JBE loop
JMP big_loop
#endif
// Compare 64-bytes per loop iteration.
// Loop is unrolled and uses AVX2.
big_loop_avx2:
VMOVDQU (SI), Y2
VMOVDQU (DI), Y3
VMOVDQU 32(SI), Y4
VMOVDQU 32(DI), Y5
VPCMPEQB Y2, Y3, Y0
VPMOVMSKB Y0, AX
XORL $0xffffffff, AX
JNE diff32_avx2
VPCMPEQB Y4, Y5, Y6
VPMOVMSKB Y6, AX
XORL $0xffffffff, AX
JNE diff64_avx2
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JB big_loop_avx2_exit
JMP big_loop_avx2
// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
diff32_avx2:
VZEROUPPER
JMP diff16
// Same as diff32_avx2, but for last 32 bytes.
diff64_avx2:
VZEROUPPER
JMP diff48
// For <64 bytes remainder jump to normal loop.
big_loop_avx2_exit:
VZEROUPPER
JMP loop

View File

@@ -0,0 +1,86 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-28
MOVW a_base+0(FP), R2
MOVW a_len+4(FP), R0
MOVW b_base+12(FP), R3
MOVW b_len+16(FP), R1
ADD $28, R13, R7
B cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
MOVW a_base+0(FP), R2
MOVW a_len+4(FP), R0
MOVW b_base+8(FP), R3
MOVW b_len+12(FP), R1
ADD $20, R13, R7
B cmpbody<>(SB)
// On entry:
// R0 is the length of a
// R1 is the length of b
// R2 points to the start of a
// R3 points to the start of b
// R7 points to return value (-1/0/1 will be written here)
//
// On exit:
// R4, R5, R6 and R8 are clobbered
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP R2, R3
BEQ samebytes
CMP R0, R1
MOVW R0, R6
MOVW.LT R1, R6 // R6 is min(R0, R1)
CMP $0, R6
BEQ samebytes
CMP $4, R6
ADD R2, R6 // R2 is current byte in a, R6 is the end of the range to compare
BLT byte_loop // length < 4
AND $3, R2, R8
CMP $0, R8
BNE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
aligned_a:
AND $3, R3, R8
CMP $0, R8
BNE byte_loop // unaligned b, use byte-wise compare
AND $0xfffffffc, R6, R8
// length >= 4
chunk4_loop:
MOVW.P 4(R2), R4
MOVW.P 4(R3), R5
CMP R4, R5
BNE cmp
CMP R2, R8
BNE chunk4_loop
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
byte_loop:
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BNE ret
CMP R2, R6
BNE byte_loop
samebytes:
CMP R0, R1
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW.EQ $0, R0
MOVW R0, (R7)
RET
ret:
// bytes differed
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW R0, (R7)
RET
cmp:
SUB $4, R2, R2
SUB $4, R3, R3
B byte_loop

View File

@@ -0,0 +1,125 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
// R0 = a_base (want in R0)
// R1 = a_len (want in R1)
// R2 = a_cap (unused)
// R3 = b_base (want in R2)
// R4 = b_len (want in R3)
// R5 = b_cap (unused)
MOVD R3, R2
MOVD R4, R3
B cmpbody<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// R0 = a_base
// R1 = a_len
// R2 = b_base
// R3 = b_len
B cmpbody<>(SB)
// On entry:
// R0 points to the start of a
// R1 is the length of a
// R2 points to the start of b
// R3 is the length of b
//
// On exit:
// R0 is the result
// R4, R5, R6, R8, R9 and R10 are clobbered
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP R0, R2
BEQ samebytes // same starting pointers; compare lengths
CMP R1, R3
CSEL LT, R3, R1, R6 // R6 is min(R1, R3)
CBZ R6, samebytes
BIC $0xf, R6, R10
CBZ R10, small // length < 16
ADD R0, R10 // end of chunk16
// length >= 16
chunk16_loop:
LDP.P 16(R0), (R4, R8)
LDP.P 16(R2), (R5, R9)
CMP R4, R5
BNE cmp
CMP R8, R9
BNE cmpnext
CMP R10, R0
BNE chunk16_loop
AND $0xf, R6, R6
CBZ R6, samebytes
SUBS $8, R6
BLT tail
// the length of tail > 8 bytes
MOVD.P 8(R0), R4
MOVD.P 8(R2), R5
CMP R4, R5
BNE cmp
SUB $8, R6
// compare last 8 bytes
tail:
MOVD (R0)(R6), R4
MOVD (R2)(R6), R5
CMP R4, R5
BEQ samebytes
cmp:
REV R4, R4
REV R5, R5
CMP R4, R5
ret:
MOVD $1, R0
CNEG HI, R0, R0
RET
small:
TBZ $3, R6, lt_8
MOVD (R0), R4
MOVD (R2), R5
CMP R4, R5
BNE cmp
SUBS $8, R6
BEQ samebytes
ADD $8, R0
ADD $8, R2
SUB $8, R6
B tail
lt_8:
TBZ $2, R6, lt_4
MOVWU (R0), R4
MOVWU (R2), R5
CMPW R4, R5
BNE cmp
SUBS $4, R6
BEQ samebytes
ADD $4, R0
ADD $4, R2
lt_4:
TBZ $1, R6, lt_2
MOVHU (R0), R4
MOVHU (R2), R5
CMPW R4, R5
BNE cmp
ADD $2, R0
ADD $2, R2
lt_2:
TBZ $0, R6, samebytes
one:
MOVBU (R0), R4
MOVBU (R2), R5
CMPW R4, R5
BNE ret
samebytes:
CMP R3, R1
CSET NE, R0
CNEG LO, R0, R0
RET
cmpnext:
REV R8, R4
REV R9, R5
CMP R4, R5
B ret

View File

@@ -0,0 +1,76 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !386 && !amd64 && !s390x && !arm && !arm64 && !loong64 && !ppc64 && !ppc64le && !mips && !mipsle && !wasm && !mips64 && !mips64le && !riscv64
package bytealg
import _ "unsafe" // for go:linkname
func Compare(a, b []byte) int {
l := len(a)
if len(b) < l {
l = len(b)
}
if l == 0 || &a[0] == &b[0] {
goto samebytes
}
for i := 0; i < l; i++ {
c1, c2 := a[i], b[i]
if c1 < c2 {
return -1
}
if c1 > c2 {
return +1
}
}
samebytes:
if len(a) < len(b) {
return -1
}
if len(a) > len(b) {
return +1
}
return 0
}
func CompareString(a, b string) int {
return runtime_cmpstring(a, b)
}
// runtime.cmpstring calls are emitted by the compiler.
//
// runtime.cmpstring should be an internal detail,
// but widely used packages access it using linkname.
// Notable members of the hall of shame include:
// - gitee.com/zhaochuninhefei/gmgo
// - github.com/bytedance/gopkg
// - github.com/songzhibin97/gkit
//
// Do not remove or change the type signature.
// See go.dev/issue/67401.
//
//go:linkname runtime_cmpstring runtime.cmpstring
func runtime_cmpstring(a, b string) int {
l := len(a)
if len(b) < l {
l = len(b)
}
for i := 0; i < l; i++ {
c1, c2 := a[i], b[i]
if c1 < c2 {
return -1
}
if c1 > c2 {
return +1
}
}
if len(a) < len(b) {
return -1
}
if len(a) > len(b) {
return +1
}
return 0
}

View File

@@ -0,0 +1,88 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
// R4 = a_base
// R5 = a_len
// R6 = a_cap (unused)
// R7 = b_base (want in R6)
// R8 = b_len (want in R7)
// R9 = b_cap (unused)
MOVV R7, R6
MOVV R8, R7
JMP cmpbody<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
// R4 = a_base
// R5 = a_len
// R6 = b_base
// R7 = b_len
JMP cmpbody<>(SB)
// On entry:
// R5 length of a
// R7 length of b
// R4 points to the start of a
// R6 points to the start of b
// R13 points to the return value (-1/0/1)
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
BEQ R4, R6, samebytes // same start of a and b
SGTU R5, R7, R9
BNE R0, R9, r2_lt_r1
MOVV R5, R14
JMP entry
r2_lt_r1:
MOVV R7, R14 // R14 is min(R4, R5)
entry:
ADDV R4, R14, R12 // R6 start of a, R14 end of a
BEQ R4, R12, samebytes // length is 0
SRLV $4, R14 // R14 is number of chunks
BEQ R0, R14, byte_loop
// make sure both a and b are aligned.
OR R4, R6, R15
AND $7, R15
BNE R0, R15, byte_loop
PCALIGN $16
chunk16_loop:
BEQ R0, R14, byte_loop
MOVV (R4), R8
MOVV (R6), R9
BNE R8, R9, byte_loop
MOVV 8(R4), R16
MOVV 8(R6), R17
ADDV $16, R4
ADDV $16, R6
SUBVU $1, R14
BEQ R16, R17, chunk16_loop
SUBV $8, R4
SUBV $8, R6
byte_loop:
BEQ R4, R12, samebytes
MOVBU (R4), R8
ADDVU $1, R4
MOVBU (R6), R9
ADDVU $1, R6
BEQ R8, R9, byte_loop
byte_cmp:
SGTU R8, R9, R4 // R12 = 1 if (R8 > R9)
BNE R0, R4, ret
MOVV $-1, R4
JMP ret
samebytes:
SGTU R5, R7, R8
SGTU R7, R5, R9
SUBV R9, R8, R4
ret:
RET

View File

@@ -0,0 +1,88 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips64 || mips64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-56
MOVV a_base+0(FP), R3
MOVV b_base+24(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+32(FP), R2
MOVV $ret+48(FP), R9
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVV a_base+0(FP), R3
MOVV b_base+16(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+24(FP), R2
MOVV $ret+32(FP), R9
JMP cmpbody<>(SB)
// On entry:
// R1 length of a
// R2 length of b
// R3 points to the start of a
// R4 points to the start of b
// R9 points to the return value (-1/0/1)
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
BEQ R3, R4, samebytes // same start of a and b
SGTU R1, R2, R7
BNE R0, R7, r2_lt_r1
MOVV R1, R10
JMP entry
r2_lt_r1:
MOVV R2, R10 // R10 is min(R1, R2)
entry:
ADDV R3, R10, R8 // R3 start of a, R8 end of a
BEQ R3, R8, samebytes // length is 0
SRLV $4, R10 // R10 is number of chunks
BEQ R0, R10, byte_loop
// make sure both a and b are aligned.
OR R3, R4, R11
AND $7, R11
BNE R0, R11, byte_loop
chunk16_loop:
BEQ R0, R10, byte_loop
MOVV (R3), R6
MOVV (R4), R7
BNE R6, R7, byte_loop
MOVV 8(R3), R13
MOVV 8(R4), R14
ADDV $16, R3
ADDV $16, R4
SUBVU $1, R10
BEQ R13, R14, chunk16_loop
SUBV $8, R3
SUBV $8, R4
byte_loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDVU $1, R3
MOVBU (R4), R7
ADDVU $1, R4
BEQ R6, R7, byte_loop
byte_cmp:
SGTU R6, R7, R8 // R8 = 1 if (R6 > R7)
BNE R0, R8, ret
MOVV $-1, R8
JMP ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBV R7, R6, R8
ret:
MOVV R8, (R9)
RET

View File

@@ -0,0 +1,72 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips || mipsle
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-28
MOVW a_base+0(FP), R3
MOVW b_base+12(FP), R4
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+24(FP)
RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVW a_base+0(FP), R3
MOVW a_len+4(FP), R1
MOVW b_base+8(FP), R4
MOVW b_len+12(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
loop:
BEQ R3, R8, samebytes // all compared bytes were the same; compare lengths
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
// bytes differed
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+16(FP)
RET

View File

@@ -0,0 +1,23 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build 386 || amd64 || s390x || arm || arm64 || loong64 || ppc64 || ppc64le || mips || mipsle || wasm || mips64 || mips64le || riscv64
package bytealg
import _ "unsafe" // For go:linkname
//go:noescape
func Compare(a, b []byte) int
func CompareString(a, b string) int {
return abigen_runtime_cmpstring(a, b)
}
// The declaration below generates ABI wrappers for functions
// implemented in assembly in this package but declared in another
// package.
//go:linkname abigen_runtime_cmpstring runtime.cmpstring
func abigen_runtime_cmpstring(a, b string) int

View File

@@ -0,0 +1,342 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
#include "go_asm.h"
#include "textflag.h"
// Helper names for x-form loads in BE ordering.
#ifdef GOARCH_ppc64le
#define _LDBEX MOVDBR
#define _LWBEX MOVWBR
#define _LHBEX MOVHBR
#else
#define _LDBEX MOVD
#define _LWBEX MOVW
#define _LHBEX MOVH
#endif
#ifdef GOPPC64_power9
#define SETB_CR0(rout) SETB CR0, rout
#define SETB_CR1(rout) SETB CR1, rout
#define SETB_INIT()
#define SETB_CR0_NE(rout) SETB_CR0(rout)
#else
// A helper macro to emulate SETB on P8. This assumes
// -1 is in R20, and 1 is in R21. crxlt and crxeq must
// also be the same CR field.
#define _SETB(crxlt, crxeq, rout) \
ISEL crxeq,R0,R21,rout \
ISEL crxlt,R20,rout,rout
// A special case when it is know the comparison
// will always be not equal. The result must be -1 or 1.
#define SETB_CR0_NE(rout) \
ISEL CR0LT,R20,R21,rout
#define SETB_CR0(rout) _SETB(CR0LT, CR0EQ, rout)
#define SETB_CR1(rout) _SETB(CR1LT, CR1EQ, rout)
#define SETB_INIT() \
MOVD $-1,R20 \
MOVD $1,R21
#endif
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
// incoming:
// R3 a addr
// R4 a len
// R6 b addr
// R7 b len
//
// on entry to cmpbody:
// R3 return value if len(a) == len(b)
// R5 a addr
// R6 b addr
// R9 min(len(a),len(b))
SETB_INIT()
MOVD R3,R5
CMP R4,R7,CR0
CMP R3,R6,CR7
ISEL CR0LT,R4,R7,R9
SETB_CR0(R3)
BC $12,30,LR // beqlr cr7
BR cmpbody<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// incoming:
// R3 a addr -> R5
// R4 a len -> R3
// R5 b addr -> R6
// R6 b len -> R4
//
// on entry to cmpbody:
// R3 compare value if compared length is same.
// R5 a addr
// R6 b addr
// R9 min(len(a),len(b))
SETB_INIT()
CMP R4,R6,CR0
CMP R3,R5,CR7
ISEL CR0LT,R4,R6,R9
MOVD R5,R6
MOVD R3,R5
SETB_CR0(R3)
BC $12,30,LR // beqlr cr7
BR cmpbody<>(SB)
#ifdef GOARCH_ppc64le
DATA byteswap<>+0(SB)/8, $0x0706050403020100
DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
GLOBL byteswap<>+0(SB), RODATA, $16
#define SWAP V21
#endif
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
start:
CMP R9,$16,CR0
CMP R9,$32,CR1
CMP R9,$64,CR2
MOVD $16,R10
BLT cmp8
BLT CR1,cmp16
BLT CR2,cmp32
cmp64: // >= 64B
DCBT (R5) // optimize for size>=64
DCBT (R6) // cache hint
SRD $6,R9,R14 // There is at least one iteration.
MOVD R14,CTR
ANDCC $63,R9,R9
CMP R9,$16,CR1 // Do setup for tail check early on.
CMP R9,$32,CR2
CMP R9,$48,CR3
ADD $-16,R9,R9
MOVD $32,R11 // set offsets to load into vector
MOVD $48,R12 // set offsets to load into vector
PCALIGN $16
cmp64_loop:
LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector
LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector
VCMPEQUDCC V3,V4,V1
BGE CR6,different // jump out if its different
LXVD2X (R5)(R10),V3 // load bytes of A at offset 16 into vector
LXVD2X (R6)(R10),V4 // load bytes of B at offset 16 into vector
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R5)(R11),V3 // load bytes of A at offset 32 into vector
LXVD2X (R6)(R11),V4 // load bytes of B at offset 32 into vector
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R5)(R12),V3 // load bytes of A at offset 64 into vector
LXVD2X (R6)(R12),V4 // load bytes of B at offset 64 into vector
VCMPEQUDCC V3,V4,V1
BGE CR6,different
ADD $64,R5,R5 // increment to next 64 bytes of A
ADD $64,R6,R6 // increment to next 64 bytes of B
BDNZ cmp64_loop
BC $12,2,LR // beqlr
// Finish out tail with minimal overlapped checking.
// Note, 0 tail is handled by beqlr above.
BLE CR1,cmp64_tail_gt0
BLE CR2,cmp64_tail_gt16
BLE CR3,cmp64_tail_gt32
cmp64_tail_gt48: // 49 - 63 B
LXVD2X (R0)(R5),V3
LXVD2X (R0)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R5)(R10),V3
LXVD2X (R6)(R10),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R5)(R11),V3
LXVD2X (R6)(R11),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
BR cmp64_tail_gt0
PCALIGN $16
cmp64_tail_gt32: // 33 - 48B
LXVD2X (R0)(R5),V3
LXVD2X (R0)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R5)(R10),V3
LXVD2X (R6)(R10),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
BR cmp64_tail_gt0
PCALIGN $16
cmp64_tail_gt16: // 17 - 32B
LXVD2X (R0)(R5),V3
LXVD2X (R0)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
BR cmp64_tail_gt0
PCALIGN $16
cmp64_tail_gt0: // 1 - 16B
LXVD2X (R5)(R9),V3
LXVD2X (R6)(R9),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
RET
PCALIGN $16
cmp32: // 32 - 63B
ANDCC $31,R9,R9
LXVD2X (R0)(R5),V3
LXVD2X (R0)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R10)(R5),V3
LXVD2X (R10)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
BC $12,2,LR // beqlr
ADD R9,R10,R10
LXVD2X (R9)(R5),V3
LXVD2X (R9)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
LXVD2X (R10)(R5),V3
LXVD2X (R10)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
RET
PCALIGN $16
cmp16: // 16 - 31B
ANDCC $15,R9,R9
LXVD2X (R0)(R5),V3
LXVD2X (R0)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
BC $12,2,LR // beqlr
LXVD2X (R9)(R5),V3
LXVD2X (R9)(R6),V4
VCMPEQUDCC V3,V4,V1
BGE CR6,different
RET
PCALIGN $16
different:
#ifdef GOARCH_ppc64le
MOVD $byteswap<>+00(SB),R16
LXVD2X (R16)(R0),SWAP // Set up swap string
VPERM V3,V3,SWAP,V3
VPERM V4,V4,SWAP,V4
#endif
MFVSRD VS35,R16 // move upper doublewords of A and B into GPR for comparison
MFVSRD VS36,R10
CMPU R16,R10
BEQ lower
SETB_CR0_NE(R3)
RET
PCALIGN $16
lower:
VSLDOI $8,V3,V3,V3 // move lower doublewords of A and B into GPR for comparison
MFVSRD VS35,R16
VSLDOI $8,V4,V4,V4
MFVSRD VS36,R10
CMPU R16,R10
SETB_CR0_NE(R3)
RET
PCALIGN $16
cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10)
#ifdef GOPPC64_power10
SLD $56,R9,R9
LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled.
LXVLL R6,R9,V4
VCMPUQ V3,V4,CR0 // Compare as a 128b integer.
SETB_CR0(R6)
ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value.
RET
#else
CMP R9,$8
BLT cmp4
ANDCC $7,R9,R9
_LDBEX (R0)(R5),R10
_LDBEX (R0)(R6),R11
_LDBEX (R9)(R5),R12
_LDBEX (R9)(R6),R14
CMPU R10,R11,CR0
SETB_CR0(R5)
CMPU R12,R14,CR1
SETB_CR1(R6)
CRAND CR0EQ,CR1EQ,CR1EQ // If both equal, length determines return value.
ISEL CR0EQ,R6,R5,R4
ISEL CR1EQ,R3,R4,R3
RET
PCALIGN $16
cmp4: // 4 - 7B
CMP R9,$4
BLT cmp2
ANDCC $3,R9,R9
_LWBEX (R0)(R5),R10
_LWBEX (R0)(R6),R11
_LWBEX (R9)(R5),R12
_LWBEX (R9)(R6),R14
RLDIMI $32,R10,$0,R12
RLDIMI $32,R11,$0,R14
CMPU R12,R14
BR cmp0
PCALIGN $16
cmp2: // 2 - 3B
CMP R9,$2
BLT cmp1
ANDCC $1,R9,R9
_LHBEX (R0)(R5),R10
_LHBEX (R0)(R6),R11
_LHBEX (R9)(R5),R12
_LHBEX (R9)(R6),R14
RLDIMI $32,R10,$0,R12
RLDIMI $32,R11,$0,R14
CMPU R12,R14
BR cmp0
PCALIGN $16
cmp1:
CMP R9,$0
BEQ cmp0
MOVBZ (R5),R10
MOVBZ (R6),R11
CMPU R10,R11
cmp0:
SETB_CR0(R6)
ISEL CR0EQ,R3,R6,R3
RET
#endif

View File

@@ -0,0 +1,222 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
// X10 = a_base
// X11 = a_len
// X12 = a_cap (unused)
// X13 = b_base (want in X12)
// X14 = b_len (want in X13)
// X15 = b_cap (unused)
MOV X13, X12
MOV X14, X13
JMP compare<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// X10 = a_base
// X11 = a_len
// X12 = b_base
// X13 = b_len
JMP compare<>(SB)
// On entry:
// X10 points to start of a
// X11 length of a
// X12 points to start of b
// X13 length of b
// for non-regabi X14 points to the address to store the return value (-1/0/1)
// for regabi the return value in X10
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
BEQ X10, X12, cmp_len
MOV X11, X5
BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
MOV X13, X5
use_a_len:
BEQZ X5, cmp_len
MOV $32, X6
BLT X5, X6, check8_unaligned
// Check alignment - if alignment differs we have to do one byte at a time.
AND $7, X10, X7
AND $7, X12, X8
BNE X7, X8, check8_unaligned
BEQZ X7, compare32
// Check one byte at a time until we reach 8 byte alignment.
SUB X7, X0, X7
ADD $8, X7, X7
SUB X7, X5, X5
align:
SUB $1, X7
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
BNEZ X7, align
check32:
// X6 contains $32
BLT X5, X6, compare16
compare32:
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
MOV 16(X10), X15
MOV 16(X12), X16
MOV 24(X10), X17
MOV 24(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
ADD $32, X10
ADD $32, X12
SUB $32, X5
BGE X5, X6, compare32
BEQZ X5, cmp_len
check16:
MOV $16, X6
BLT X5, X6, check8_unaligned
compare16:
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
ADD $16, X10
ADD $16, X12
SUB $16, X5
BEQZ X5, cmp_len
check8_unaligned:
MOV $8, X6
BLT X5, X6, check4_unaligned
compare8_unaligned:
MOVBU 0(X10), X8
MOVBU 1(X10), X15
MOVBU 2(X10), X17
MOVBU 3(X10), X19
MOVBU 4(X10), X21
MOVBU 5(X10), X23
MOVBU 6(X10), X25
MOVBU 7(X10), X29
MOVBU 0(X12), X9
MOVBU 1(X12), X16
MOVBU 2(X12), X18
MOVBU 3(X12), X20
MOVBU 4(X12), X22
MOVBU 5(X12), X24
MOVBU 6(X12), X28
MOVBU 7(X12), X30
BNE X8, X9, cmp1a
BNE X15, X16, cmp1b
BNE X17, X18, cmp1c
BNE X19, X20, cmp1d
BNE X21, X22, cmp1e
BNE X23, X24, cmp1f
BNE X25, X28, cmp1g
BNE X29, X30, cmp1h
ADD $8, X10
ADD $8, X12
SUB $8, X5
BGE X5, X6, compare8_unaligned
BEQZ X5, cmp_len
check4_unaligned:
MOV $4, X6
BLT X5, X6, compare1
compare4_unaligned:
MOVBU 0(X10), X8
MOVBU 1(X10), X15
MOVBU 2(X10), X17
MOVBU 3(X10), X19
MOVBU 0(X12), X9
MOVBU 1(X12), X16
MOVBU 2(X12), X18
MOVBU 3(X12), X20
BNE X8, X9, cmp1a
BNE X15, X16, cmp1b
BNE X17, X18, cmp1c
BNE X19, X20, cmp1d
ADD $4, X10
ADD $4, X12
SUB $4, X5
BGE X5, X6, compare4_unaligned
compare1:
BEQZ X5, cmp_len
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
SUB $1, X5
JMP compare1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
MOV X15, X17
MOV X16, X18
// Compare 8 bytes of memory in X17/X18 that are known to differ.
cmp8b:
MOV $0xff, X19
cmp8_loop:
AND X17, X19, X8
AND X18, X19, X9
BNE X8, X9, cmp
SLLI $8, X19
JMP cmp8_loop
cmp1a:
SLTU X9, X8, X5
SLTU X8, X9, X6
JMP cmp_ret
cmp1b:
SLTU X16, X15, X5
SLTU X15, X16, X6
JMP cmp_ret
cmp1c:
SLTU X18, X17, X5
SLTU X17, X18, X6
JMP cmp_ret
cmp1d:
SLTU X20, X19, X5
SLTU X19, X20, X6
JMP cmp_ret
cmp1e:
SLTU X22, X21, X5
SLTU X21, X22, X6
JMP cmp_ret
cmp1f:
SLTU X24, X23, X5
SLTU X23, X24, X6
JMP cmp_ret
cmp1g:
SLTU X28, X25, X5
SLTU X25, X28, X6
JMP cmp_ret
cmp1h:
SLTU X30, X29, X5
SLTU X29, X30, X6
JMP cmp_ret
cmp_len:
MOV X11, X8
MOV X13, X9
cmp:
SLTU X9, X8, X5
SLTU X8, X9, X6
cmp_ret:
SUB X5, X6, X10
RET

View File

@@ -0,0 +1,69 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+24(FP), R5
MOVD b_len+32(FP), R6
LA ret+48(FP), R7
BR cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+16(FP), R5
MOVD b_len+24(FP), R6
LA ret+32(FP), R7
BR cmpbody<>(SB)
// input:
// R3 = a
// R4 = alen
// R5 = b
// R6 = blen
// R7 = address of output word (stores -1/0/1 here)
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, cmplengths
MOVD R4, R8
CMPBLE R4, R6, amin
MOVD R6, R8
amin:
CMPBEQ R8, $0, cmplengths
CMP R8, $256
BLE tail
loop:
CLC $256, 0(R3), 0(R5)
BGT gt
BLT lt
SUB $256, R8
MOVD $256(R3), R3
MOVD $256(R5), R5
CMP R8, $256
BGT loop
tail:
SUB $1, R8
EXRL $cmpbodyclc<>(SB), R8
BGT gt
BLT lt
cmplengths:
CMP R4, R6
BEQ eq
BLT lt
gt:
MOVD $1, 0(R7)
RET
lt:
MOVD $-1, 0(R7)
RET
eq:
MOVD $0, 0(R7)
RET
TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
RET

View File

@@ -0,0 +1,115 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB), NOSPLIT, $0-56
Get SP
I64Load a_base+0(FP)
I64Load a_len+8(FP)
I64Load b_base+24(FP)
I64Load b_len+32(FP)
Call cmpbody<>(SB)
I64Store ret+48(FP)
RET
TEXT runtime·cmpstring(SB), NOSPLIT, $0-40
Get SP
I64Load a_base+0(FP)
I64Load a_len+8(FP)
I64Load b_base+16(FP)
I64Load b_len+24(FP)
Call cmpbody<>(SB)
I64Store ret+32(FP)
RET
// params: a, alen, b, blen
// ret: -1/0/1
TEXT cmpbody<>(SB), NOSPLIT, $0-0
// len = min(alen, blen)
Get R1
Get R3
Get R1
Get R3
I64LtU
Select
Set R4
Get R0
I32WrapI64
Get R2
I32WrapI64
Get R4
I32WrapI64
Call memcmp<>(SB)
I64ExtendI32S
Tee R5
I64Eqz
If
// check length
Get R1
Get R3
I64Sub
Set R5
End
I64Const $0
I64Const $-1
I64Const $1
Get R5
I64Const $0
I64LtS
Select
Get R5
I64Eqz
Select
Return
// compiled with emscripten
// params: a, b, len
// ret: <0/0/>0
TEXT memcmp<>(SB), NOSPLIT, $0-0
Get R2
If $1
Loop
Get R0
I32Load8S $0
Tee R3
Get R1
I32Load8S $0
Tee R4
I32Eq
If
Get R0
I32Const $1
I32Add
Set R0
Get R1
I32Const $1
I32Add
Set R1
I32Const $0
Get R2
I32Const $-1
I32Add
Tee R2
I32Eqz
BrIf $3
Drop
Br $1
End
End
Get R3
I32Const $255
I32And
Get R4
I32Const $255
I32And
I32Sub
Else
I32Const $0
End
Return

View File

@@ -0,0 +1,229 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
#ifndef hasPOPCNT
CMPB internalcpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGeneric(SB)
#endif
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), BX
MOVB c+24(FP), AL
LEAQ ret+32(FP), R8
JMP countbody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-32
#ifndef hasPOPCNT
CMPB internalcpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGenericString(SB)
#endif
MOVQ s_base+0(FP), SI
MOVQ s_len+8(FP), BX
MOVB c+16(FP), AL
LEAQ ret+24(FP), R8
JMP countbody<>(SB)
// input:
// SI: data
// BX: data len
// AL: byte sought
// R8: address to put result
// This function requires the POPCNT instruction.
TEXT countbody<>(SB),NOSPLIT,$0
// Shuffle X0 around so that each byte contains
// the character we're looking for.
MOVD AX, X0
PUNPCKLBW X0, X0
PUNPCKLBW X0, X0
PSHUFL $0, X0, X0
CMPQ BX, $16
JLT small
MOVQ $0, R12 // Accumulator
MOVQ SI, DI
CMPQ BX, $64
JAE avx2
sse:
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
JMP sseloopentry
PCALIGN $16
sseloop:
// Move the next 16-byte chunk of the data into X1.
MOVOU (DI), X1
// Compare bytes in X0 to X1.
PCMPEQB X0, X1
// Take the top bit of each byte in X1 and put the result in DX.
PMOVMSKB X1, DX
// Count number of matching bytes
POPCNTL DX, DX
// Accumulate into R12
ADDQ DX, R12
// Advance to next block.
ADDQ $16, DI
sseloopentry:
CMPQ DI, AX
JBE sseloop
// Get the number of bytes to consider in the last 16 bytes
ANDQ $15, BX
JZ end
// Create mask to ignore overlap between previous 16 byte block
// and the next.
MOVQ $16,CX
SUBQ BX, CX
MOVQ $0xFFFF, R10
SARQ CL, R10
SALQ CL, R10
// Process the last 16-byte chunk. This chunk may overlap with the
// chunks we've already searched so we need to mask part of it.
MOVOU (AX), X1
PCMPEQB X0, X1
PMOVMSKB X1, DX
// Apply mask
ANDQ R10, DX
POPCNTL DX, DX
ADDQ DX, R12
end:
MOVQ R12, (R8)
RET
// handle for lengths < 16
small:
TESTQ BX, BX
JEQ endzero
// Check if we'll load across a page boundary.
LEAQ 16(SI), AX
TESTW $0xff0, AX
JEQ endofpage
// We must ignore high bytes as they aren't part of our slice.
// Create mask.
MOVB BX, CX
MOVQ $1, R10
SALQ CL, R10
SUBQ $1, R10
// Load data
MOVOU (SI), X1
// Compare target byte with each byte in data.
PCMPEQB X0, X1
// Move result bits to integer register.
PMOVMSKB X1, DX
// Apply mask
ANDQ R10, DX
POPCNTL DX, DX
// Directly return DX, we don't need to accumulate
// since we have <16 bytes.
MOVQ DX, (R8)
RET
endzero:
MOVQ $0, (R8)
RET
endofpage:
// We must ignore low bytes as they aren't part of our slice.
MOVQ $16,CX
SUBQ BX, CX
MOVQ $0xFFFF, R10
SARQ CL, R10
SALQ CL, R10
// Load data into the high end of X1.
MOVOU -16(SI)(BX*1), X1
// Compare target byte with each byte in data.
PCMPEQB X0, X1
// Move result bits to integer register.
PMOVMSKB X1, DX
// Apply mask
ANDQ R10, DX
// Directly return DX, we don't need to accumulate
// since we have <16 bytes.
POPCNTL DX, DX
MOVQ DX, (R8)
RET
avx2:
#ifndef hasAVX2
CMPB internalcpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
#endif
MOVD AX, X0
LEAQ -64(SI)(BX*1), R11
LEAQ (SI)(BX*1), R13
VPBROADCASTB X0, Y1
PCALIGN $32
avx2_loop:
VMOVDQU (DI), Y2
VMOVDQU 32(DI), Y4
VPCMPEQB Y1, Y2, Y3
VPCMPEQB Y1, Y4, Y5
VPMOVMSKB Y3, DX
VPMOVMSKB Y5, CX
POPCNTL DX, DX
POPCNTL CX, CX
ADDQ DX, R12
ADDQ CX, R12
ADDQ $64, DI
CMPQ DI, R11
JLE avx2_loop
// If last block is already processed,
// skip to the end.
//
// This check is NOT an optimization; if the input length is a
// multiple of 64, we must not go through the last leg of the
// function because the bit shift count passed to SALQ below would
// be 64, which is outside of the 0-63 range supported by those
// instructions.
//
// Tests in the bytes and strings packages with input lengths that
// are multiples of 64 will break if this condition were removed.
CMPQ DI, R13
JEQ endavx
// Load address of the last 64 bytes.
// There is an overlap with the previous block.
MOVQ R11, DI
VMOVDQU (DI), Y2
VMOVDQU 32(DI), Y4
VPCMPEQB Y1, Y2, Y3
VPCMPEQB Y1, Y4, Y5
VPMOVMSKB Y3, DX
VPMOVMSKB Y5, CX
// Exit AVX mode.
VZEROUPPER
SALQ $32, CX
ORQ CX, DX
// Create mask to ignore overlap between previous 64 byte block
// and the next.
ANDQ $63, BX
MOVQ $64, CX
SUBQ BX, CX
MOVQ $0xFFFFFFFFFFFFFFFF, R10
SALQ CL, R10
// Apply mask
ANDQ R10, DX
POPCNTQ DX, DX
ADDQ DX, R12
MOVQ R12, (R8)
RET
endavx:
// Exit AVX mode.
VZEROUPPER
MOVQ R12, (R8)
RET

View File

@@ -0,0 +1,43 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-20
MOVW b_base+0(FP), R0
MOVW b_len+4(FP), R1
MOVBU c+12(FP), R2
MOVW $ret+16(FP), R7
B countbytebody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-16
MOVW s_base+0(FP), R0
MOVW s_len+4(FP), R1
MOVBU c+8(FP), R2
MOVW $ret+12(FP), R7
B countbytebody<>(SB)
// Input:
// R0: data
// R1: data length
// R2: byte to find
// R7: address to put result
//
// On exit:
// R4 and R8 are clobbered
TEXT countbytebody<>(SB),NOSPLIT,$0
MOVW $0, R8 // R8 = count of byte to search
CMP $0, R1
B.EQ done // short path to handle 0-byte case
ADD R0, R1 // R1 is the end of the range
byte_loop:
MOVBU.P 1(R0), R4
CMP R4, R2
ADD.EQ $1, R8
CMP R0, R1
B.NE byte_loop
done:
MOVW R8, (R7)
RET

View File

@@ -0,0 +1,92 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
MOVD b_base+0(FP), R0
MOVD b_len+8(FP), R2
MOVBU c+24(FP), R1
MOVD $ret+32(FP), R8
B countbytebody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-32
MOVD s_base+0(FP), R0
MOVD s_len+8(FP), R2
MOVBU c+16(FP), R1
MOVD $ret+24(FP), R8
B countbytebody<>(SB)
// input:
// R0: data
// R2: data len
// R1: byte to find
// R8: address to put result
TEXT countbytebody<>(SB),NOSPLIT,$0
// R11 = count of byte to search
MOVD $0, R11
// short path to handle 0-byte case
CBZ R2, done
CMP $0x20, R2
// jump directly to tail if length < 32
BLO tail
ANDS $0x1f, R0, R9
BEQ chunk
// Work with not 32-byte aligned head
BIC $0x1f, R0, R3
ADD $0x20, R3
PCALIGN $16
head_loop:
MOVBU.P 1(R0), R5
CMP R5, R1
CINC EQ, R11, R11
SUB $1, R2, R2
CMP R0, R3
BNE head_loop
// Work with 32-byte aligned chunks
chunk:
BIC $0x1f, R2, R9
// The first chunk can also be the last
CBZ R9, tail
// R3 = end of 32-byte chunks
ADD R0, R9, R3
MOVD $1, R5
VMOV R5, V5.B16
// R2 = length of tail
SUB R9, R2, R2
// Duplicate R1 (byte to search) to 16 1-byte elements of V0
VMOV R1, V0.B16
// Clear the low 64-bit element of V7 and V8
VEOR V7.B8, V7.B8, V7.B8
VEOR V8.B8, V8.B8, V8.B8
PCALIGN $16
// Count the target byte in 32-byte chunk
chunk_loop:
VLD1.P (R0), [V1.B16, V2.B16]
CMP R0, R3
VCMEQ V0.B16, V1.B16, V3.B16
VCMEQ V0.B16, V2.B16, V4.B16
// Clear the higher 7 bits
VAND V5.B16, V3.B16, V3.B16
VAND V5.B16, V4.B16, V4.B16
// Count lanes match the requested byte
VADDP V4.B16, V3.B16, V6.B16 // 32B->16B
VUADDLV V6.B16, V7
// Accumulate the count in low 64-bit element of V8 when inside the loop
VADD V7, V8
BNE chunk_loop
VMOV V8.D[0], R6
ADD R6, R11, R11
CBZ R2, done
tail:
// Work with tail shorter than 32 bytes
MOVBU.P 1(R0), R5
SUB $1, R2, R2
CMP R5, R1
CINC EQ, R11, R11
CBNZ R2, tail
done:
MOVD R11, (R8)
RET

View File

@@ -0,0 +1,27 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 && !arm && !arm64 && !ppc64le && !ppc64 && !riscv64 && !s390x
package bytealg
func Count(b []byte, c byte) int {
n := 0
for _, x := range b {
if x == c {
n++
}
}
return n
}
func CountString(s string, c byte) int {
n := 0
for i := 0; i < len(s); i++ {
if s[i] == c {
n++
}
}
return n
}

View File

@@ -0,0 +1,33 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build amd64 || arm || arm64 || ppc64le || ppc64 || riscv64 || s390x
package bytealg
//go:noescape
func Count(b []byte, c byte) int
//go:noescape
func CountString(s string, c byte) int
// A backup implementation to use by assembly.
func countGeneric(b []byte, c byte) int {
n := 0
for _, x := range b {
if x == c {
n++
}
}
return n
}
func countGenericString(s string, c byte) int {
n := 0
for i := 0; i < len(s); i++ {
if s[i] == c {
n++
}
}
return n
}

View File

@@ -0,0 +1,154 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64le || ppc64
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// R3 = byte array pointer
// R4 = length
// R6 = byte to count
MTVRD R6, V1 // move compare byte
MOVD R6, R5
VSPLTB $7, V1, V1 // replicate byte across V1
BR countbytebody<>(SB)
TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32
// R3 = byte array pointer
// R4 = length
// R5 = byte to count
MTVRD R5, V1 // move compare byte
VSPLTB $7, V1, V1 // replicate byte across V1
BR countbytebody<>(SB)
// R3: addr of string
// R4: len of string
// R5: byte to count
// V1: byte to count, splatted.
// On exit:
// R3: return value
TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
MOVD $0, R18 // byte count
#ifndef GOPPC64_power10
RLDIMI $8, R5, $48, R5
RLDIMI $16, R5, $32, R5
RLDIMI $32, R5, $0, R5 // fill reg with the byte to count
#endif
CMPU R4, $32 // Check if it's a small string (<32 bytes)
BLT tail // Jump to the small string case
SRD $5, R4, R20
MOVD R20, CTR
MOVD $16, R21
XXLXOR V4, V4, V4
XXLXOR V5, V5, V5
PCALIGN $16
cmploop:
LXVD2X (R0)(R3), V0 // Count 32B per loop with two vector accumulators.
LXVD2X (R21)(R3), V2
VCMPEQUB V2, V1, V2
VCMPEQUB V0, V1, V0
VPOPCNTD V2, V2 // A match is 0xFF or 0. Count the bits into doubleword buckets.
VPOPCNTD V0, V0
VADDUDM V0, V4, V4 // Accumulate the popcounts. They are 8x the count.
VADDUDM V2, V5, V5 // The count will be fixed up afterwards.
ADD $32, R3
BDNZ cmploop
VADDUDM V4, V5, V5
MFVSRD V5, R18
VSLDOI $8, V5, V5, V5
MFVSRD V5, R21
ADD R21, R18, R18
ANDCC $31, R4, R4
// Skip the tail processing if no bytes remaining.
BEQ tail_0
#ifdef GOPPC64_power10
SRD $3, R18, R18 // Fix the vector loop count before counting the tail on P10.
tail: // Count the last 0 - 31 bytes.
CMP R4, $16
BLE small_tail_p10
LXV 0(R3), V0
VCMPEQUB V0, V1, V0
VCNTMBB V0, $1, R14 // Sum the value of bit 0 of each byte of the compare into R14.
SRD $56, R14, R14 // The result of VCNTMBB is shifted. Unshift it.
ADD R14, R18, R18
ADD $16, R3, R3
ANDCC $15, R4, R4
small_tail_p10:
SLD $56, R4, R6
LXVLL R3, R6, V0
VCMPEQUB V0, V1, V0
VCLRRB V0, R4, V0 // If <16B being compared, clear matches of the 16-R4 bytes.
VCNTMBB V0, $1, R14 // Sum the value of bit 0 of each byte of the compare into R14.
SRD $56, R14, R14 // The result of VCNTMBB is shifted. Unshift it.
ADD R14, R18, R3
RET
#else
tail: // Count the last 0 - 31 bytes.
CMP R4, $16
BLT tail_8
MOVD (R3), R12
MOVD 8(R3), R14
CMPB R12, R5, R12
CMPB R14, R5, R14
POPCNTD R12, R12
POPCNTD R14, R14
ADD R12, R18, R18
ADD R14, R18, R18
ADD $16, R3, R3
ADD $-16, R4, R4
tail_8: // Count the remaining 0 - 15 bytes.
CMP R4, $8
BLT tail_4
MOVD (R3), R12
CMPB R12, R5, R12
POPCNTD R12, R12
ADD R12, R18, R18
ADD $8, R3, R3
ADD $-8, R4, R4
tail_4: // Count the remaining 0 - 7 bytes.
CMP R4, $4
BLT tail_2
MOVWZ (R3), R12
CMPB R12, R5, R12
SLD $32, R12, R12 // Remove non-participating matches.
POPCNTD R12, R12
ADD R12, R18, R18
ADD $4, R3, R3
ADD $-4, R4, R4
tail_2: // Count the remaining 0 - 3 bytes.
CMP R4, $2
BLT tail_1
MOVHZ (R3), R12
CMPB R12, R5, R12
SLD $48, R12, R12 // Remove non-participating matches.
POPCNTD R12, R12
ADD R12, R18, R18
ADD $2, R3, R3
ADD $-2, R4, R4
tail_1: // Count the remaining 0 - 1 bytes.
CMP R4, $1
BLT tail_0
MOVBZ (R3), R12
CMPB R12, R5, R12
ANDCC $0x8, R12, R12
ADD R12, R18, R18
#endif
tail_0: // No remaining tail to count.
SRD $3, R18, R3 // Fixup count, it is off by 8x.
RET

View File

@@ -0,0 +1,49 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count<ABIInternal>(SB),NOSPLIT,$0-40
// X10 = b_base
// X11 = b_len
// X12 = b_cap (unused)
// X13 = byte to count (want in X12)
AND $0xff, X13, X12
MOV ZERO, X14 // count
ADD X10, X11 // end
PCALIGN $16
loop:
BEQ X10, X11, done
MOVBU (X10), X15
ADD $1, X10
BNE X12, X15, loop
ADD $1, X14
JMP loop
done:
MOV X14, X10
RET
TEXT ·CountString<ABIInternal>(SB),NOSPLIT,$0-32
// X10 = s_base
// X11 = s_len
// X12 = byte to count
AND $0xff, X12
MOV ZERO, X14 // count
ADD X10, X11 // end
PCALIGN $16
loop:
BEQ X10, X11, done
MOVBU (X10), X15
ADD $1, X10
BNE X12, X15, loop
ADD $1, X14
JMP loop
done:
MOV X14, X10
RET

View File

@@ -0,0 +1,169 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// condition code masks
#define EQ 8
#define NE 7
// register assignments
#define R_ZERO R0
#define R_VAL R1
#define R_TMP R2
#define R_PTR R3
#define R_LEN R4
#define R_CHAR R5
#define R_RET R6
#define R_ITER R7
#define R_CNT R8
#define R_MPTR R9
// vector register assignments
#define V_ZERO V0
#define V_CHAR V1
#define V_MASK V2
#define V_VAL V3
#define V_CNT V4
// mask for trailing bytes in vector implementation
GLOBL countbytemask<>(SB), RODATA, $16
DATA countbytemask<>+0(SB)/8, $0x0101010101010101
DATA countbytemask<>+8(SB)/8, $0x0101010101010101
// func Count(b []byte, c byte) int
TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
LMG b+0(FP), R_PTR, R_LEN
MOVBZ c+24(FP), R_CHAR
MOVD $ret+32(FP), R_RET
BR countbytebody<>(SB)
// func CountString(s string, c byte) int
TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
LMG s+0(FP), R_PTR, R_LEN
MOVBZ c+16(FP), R_CHAR
MOVD $ret+24(FP), R_RET
BR countbytebody<>(SB)
// input:
// R_PTR = address of array of bytes
// R_LEN = number of bytes in array
// R_CHAR = byte value to count zero (extended to register width)
// R_RET = address of return value
TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
MOVD $internalcpu·S390X+const_offsetS390xHasVX(SB), R_TMP
MOVD $countbytemask<>(SB), R_MPTR
CGIJ $EQ, R_LEN, $0, ret0 // return if length is 0.
SRD $4, R_LEN, R_ITER // R_ITER is the number of 16-byte chunks
MOVBZ (R_TMP), R_TMP // load bool indicating support for vector facility
CGIJ $EQ, R_TMP, $0, novx // jump to scalar code if the vector facility is not available
// Start of vector code (have vector facility).
//
// Set R_LEN to be the length mod 16 minus 1 to use as an index for
// vector 'load with length' (VLL). It will be in the range [-1,14].
// Also replicate c across a 16-byte vector and initialize V_ZERO.
ANDW $0xf, R_LEN
VLVGB $0, R_CHAR, V_CHAR // V_CHAR = [16]byte{c, 0, ..., 0, 0}
VZERO V_ZERO // V_ZERO = [1]uint128{0}
ADDW $-1, R_LEN
VREPB $0, V_CHAR, V_CHAR // V_CHAR = [16]byte{c, c, ..., c, c}
// Jump to loop if we have more than 15 bytes to process.
CGIJ $NE, R_ITER, $0, vxchunks
// Load 1-15 bytes and corresponding mask.
// Note: only the low 32-bits of R_LEN are used for the index.
VLL R_LEN, (R_PTR), V_VAL
VLL R_LEN, (R_MPTR), V_MASK
// Compare each byte in input chunk against byte to be counted.
// Each byte element will be set to either 0 (no match) or 1 (match).
VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
VN V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
// Accumulate matched byte count in 128-bit integer value.
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
VSUMQF V_VAL, V_ZERO, V_CNT // [4]uint32{x0, x1, x2, x3} [1]uint128{x0+x1+x2+x3}
// Return rightmost (lowest) 64-bit part of accumulator.
VSTEG $1, V_CNT, (R_RET)
RET
vxchunks:
// Load 0x01 into every byte element in the 16-byte mask vector.
VREPIB $1, V_MASK // V_MASK = [16]byte{1, 1, ..., 1, 1}
VZERO V_CNT // initial uint128 count of 0
vxloop:
// Load input bytes in 16-byte chunks.
VL (R_PTR), V_VAL
// Compare each byte in input chunk against byte to be counted.
// Each byte element will be set to either 0 (no match) or 1 (match).
VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
VN V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
// Increment input string address.
MOVD $16(R_PTR), R_PTR
// Accumulate matched byte count in 128-bit integer value.
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} [1]uint128{x0+x1+x2+x3}
VAQ V_VAL, V_CNT, V_CNT // accumulate
// Repeat until all 16-byte chunks are done.
BRCTG R_ITER, vxloop
// Skip to end if there are no trailing bytes.
CIJ $EQ, R_LEN, $-1, vxret
// Load 1-15 bytes and corresponding mask.
// Note: only the low 32-bits of R_LEN are used for the index.
VLL R_LEN, (R_PTR), V_VAL
VLL R_LEN, (R_MPTR), V_MASK
// Compare each byte in input chunk against byte to be counted.
// Each byte element will be set to either 0 (no match) or 1 (match).
VCEQB V_CHAR, V_VAL, V_VAL
VN V_MASK, V_VAL, V_VAL
// Accumulate matched byte count in 128-bit integer value.
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} [1]uint128{x0+x1+x2+x3}
VAQ V_VAL, V_CNT, V_CNT // accumulate
vxret:
// Return rightmost (lowest) 64-bit part of accumulator.
VSTEG $1, V_CNT, (R_RET)
RET
novx:
// Start of non-vector code (the vector facility not available).
//
// Initialise counter and constant zero.
MOVD $0, R_CNT
MOVD $0, R_ZERO
loop:
// Read 1-byte from input and compare.
// Note: avoid putting LOCGR in critical path.
MOVBZ (R_PTR), R_VAL
MOVD $1, R_TMP
MOVD $1(R_PTR), R_PTR
CMPW R_VAL, R_CHAR
LOCGR $NE, R_ZERO, R_TMP // select 0 if no match (1 if there is a match)
ADD R_TMP, R_CNT // accumulate 64-bit result
// Repeat until all bytes have been checked.
BRCTG R_LEN, loop
ret:
MOVD R_CNT, (R_RET)
RET
ret0:
MOVD $0, (R_RET)
RET

View File

@@ -0,0 +1,130 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT,$0-13
MOVL a+0(FP), SI
MOVL b+4(FP), DI
CMPL SI, DI
JEQ eq
MOVL size+8(FP), BX
LEAL ret+12(FP), AX
JMP memeqbody<>(SB)
eq:
MOVB $1, ret+12(FP)
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
MOVL a+0(FP), SI
MOVL b+4(FP), DI
CMPL SI, DI
JEQ eq
MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
LEAL ret+8(FP), AX
JMP memeqbody<>(SB)
eq:
MOVB $1, ret+8(FP)
RET
// a in SI
// b in DI
// count in BX
// address of result byte in AX
TEXT memeqbody<>(SB),NOSPLIT,$0-0
CMPL BX, $4
JB small
// 64 bytes at a time using xmm registers
hugeloop:
CMPL BX, $64
JB bigloop
#ifdef GO386_softfloat
JMP bigloop
#endif
MOVOU (SI), X0
MOVOU (DI), X1
MOVOU 16(SI), X2
MOVOU 16(DI), X3
MOVOU 32(SI), X4
MOVOU 32(DI), X5
MOVOU 48(SI), X6
MOVOU 48(DI), X7
PCMPEQB X1, X0
PCMPEQB X3, X2
PCMPEQB X5, X4
PCMPEQB X7, X6
PAND X2, X0
PAND X6, X4
PAND X4, X0
PMOVMSKB X0, DX
ADDL $64, SI
ADDL $64, DI
SUBL $64, BX
CMPL DX, $0xffff
JEQ hugeloop
MOVB $0, (AX)
RET
// 4 bytes at a time using 32-bit register
bigloop:
CMPL BX, $4
JBE leftover
MOVL (SI), CX
MOVL (DI), DX
ADDL $4, SI
ADDL $4, DI
SUBL $4, BX
CMPL CX, DX
JEQ bigloop
MOVB $0, (AX)
RET
// remaining 0-4 bytes
leftover:
MOVL -4(SI)(BX*1), CX
MOVL -4(DI)(BX*1), DX
CMPL CX, DX
SETEQ (AX)
RET
small:
CMPL BX, $0
JEQ equal
LEAL 0(BX*8), CX
NEGL CX
MOVL SI, DX
CMPB DX, $0xfc
JA si_high
// load at SI won't cross a page boundary.
MOVL (SI), SI
JMP si_finish
si_high:
// address ends in 111111xx. Load up to bytes we want, move to correct position.
MOVL -4(SI)(BX*1), SI
SHRL CX, SI
si_finish:
// same for DI.
MOVL DI, DX
CMPB DX, $0xfc
JA di_high
MOVL (DI), DI
JMP di_finish
di_high:
MOVL -4(DI)(BX*1), DI
SHRL CX, DI
di_finish:
SUBL SI, DI
SHLL CX, DI
equal:
SETEQ (AX)
RET

View File

@@ -0,0 +1,165 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "asm_amd64.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
// AX = a (want in SI)
// BX = b (want in DI)
// CX = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ CX, BX
JMP memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
// AX = a (want in SI)
// BX = b (want in DI)
// 8(DX) = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
JMP memeqbody<>(SB)
// Input:
// a in SI
// b in DI
// count in BX
// Output:
// result in AX
TEXT memeqbody<>(SB),NOSPLIT,$0-0
CMPQ BX, $8
JB small
CMPQ BX, $64
JB bigloop
#ifndef hasAVX2
CMPB internalcpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
// 64 bytes at a time using xmm registers
PCALIGN $16
hugeloop:
CMPQ BX, $64
JB bigloop
MOVOU (SI), X0
MOVOU (DI), X1
MOVOU 16(SI), X2
MOVOU 16(DI), X3
MOVOU 32(SI), X4
MOVOU 32(DI), X5
MOVOU 48(SI), X6
MOVOU 48(DI), X7
PCMPEQB X1, X0
PCMPEQB X3, X2
PCMPEQB X5, X4
PCMPEQB X7, X6
PAND X2, X0
PAND X6, X4
PAND X4, X0
PMOVMSKB X0, DX
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, BX
CMPL DX, $0xffff
JEQ hugeloop
XORQ AX, AX // return 0
RET
#endif
// 64 bytes at a time using ymm registers
PCALIGN $16
hugeloop_avx2:
CMPQ BX, $64
JB bigloop_avx2
VMOVDQU (SI), Y0
VMOVDQU (DI), Y1
VMOVDQU 32(SI), Y2
VMOVDQU 32(DI), Y3
VPCMPEQB Y1, Y0, Y4
VPCMPEQB Y2, Y3, Y5
VPAND Y4, Y5, Y6
VPMOVMSKB Y6, DX
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, BX
CMPL DX, $0xffffffff
JEQ hugeloop_avx2
VZEROUPPER
XORQ AX, AX // return 0
RET
bigloop_avx2:
VZEROUPPER
// 8 bytes at a time using 64-bit register
PCALIGN $16
bigloop:
CMPQ BX, $8
JBE leftover
MOVQ (SI), CX
MOVQ (DI), DX
ADDQ $8, SI
ADDQ $8, DI
SUBQ $8, BX
CMPQ CX, DX
JEQ bigloop
XORQ AX, AX // return 0
RET
// remaining 0-8 bytes
leftover:
MOVQ -8(SI)(BX*1), CX
MOVQ -8(DI)(BX*1), DX
CMPQ CX, DX
SETEQ AX
RET
small:
CMPQ BX, $0
JEQ equal
LEAQ 0(BX*8), CX
NEGQ CX
CMPB SI, $0xf8
JA si_high
// load at SI won't cross a page boundary.
MOVQ (SI), SI
JMP si_finish
si_high:
// address ends in 11111xxx. Load up to bytes we want, move to correct position.
MOVQ -8(SI)(BX*1), SI
SHRQ CX, SI
si_finish:
// same for DI.
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(BX*1), DI
SHRQ CX, DI
di_finish:
SUBQ SI, DI
SHLQ CX, DI
equal:
SETEQ AX
RET

View File

@@ -0,0 +1,91 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-13
MOVW a+0(FP), R0
MOVW b+4(FP), R2
CMP R0, R2
B.EQ eq
MOVW size+8(FP), R1
CMP $0, R1
B.EQ eq // short path to handle 0-byte case
MOVW $ret+12(FP), R7
B memeqbody<>(SB)
eq:
MOVW $1, R0
MOVB R0, ret+12(FP)
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-9
MOVW a+0(FP), R0
MOVW b+4(FP), R2
CMP R0, R2
B.EQ eq
MOVW 4(R7), R1 // compiler stores size at offset 4 in the closure
CMP $0, R1
B.EQ eq // short path to handle 0-byte case
MOVW $ret+8(FP), R7
B memeqbody<>(SB)
eq:
MOVW $1, R0
MOVB R0, ret+8(FP)
RET
// Input:
// R0: data of a
// R1: length
// R2: data of b
// R7: points to return value
//
// On exit:
// R4, R5 and R6 are clobbered
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP $1, R1
B.EQ one // 1-byte special case for better performance
CMP $4, R1
ADD R0, R1 // R1 is the end of the range to compare
B.LT byte_loop // length < 4
AND $3, R0, R6
CMP $0, R6
B.NE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
AND $3, R2, R6
CMP $0, R6
B.NE byte_loop // unaligned b, use byte-wise compare
AND $0xfffffffc, R1, R6
// length >= 4
chunk4_loop:
MOVW.P 4(R0), R4
MOVW.P 4(R2), R5
CMP R4, R5
B.NE notequal
CMP R0, R6
B.NE chunk4_loop
CMP R0, R1
B.EQ equal // reached the end
byte_loop:
MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5
CMP R4, R5
B.NE notequal
CMP R0, R1
B.NE byte_loop
equal:
MOVW $1, R0
MOVB R0, (R7)
RET
one:
MOVBU (R0), R4
MOVBU (R2), R5
CMP R4, R5
B.EQ equal
notequal:
MOVW $0, R0
MOVB R0, (R7)
RET

View File

@@ -0,0 +1,124 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
// short path to handle 0-byte case
CBZ R2, equal
// short path to handle equal pointers
CMP R0, R1
BEQ equal
B memeqbody<>(SB)
equal:
MOVD $1, R0
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
CMP R0, R1
BEQ eq
MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure
CBZ R2, eq
B memeqbody<>(SB)
eq:
MOVD $1, R0
RET
// input:
// R0: pointer a
// R1: pointer b
// R2: data len
// at return: result in R0
TEXT memeqbody<>(SB),NOSPLIT,$0
CMP $1, R2
// handle 1-byte special case for better performance
BEQ one
CMP $16, R2
// handle specially if length < 16
BLO tail
BIC $0x3f, R2, R3
CBZ R3, chunk16
// work with 64-byte chunks
ADD R3, R0, R6 // end of chunks
chunk64_loop:
VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2]
VCMEQ V0.D2, V4.D2, V8.D2
VCMEQ V1.D2, V5.D2, V9.D2
VCMEQ V2.D2, V6.D2, V10.D2
VCMEQ V3.D2, V7.D2, V11.D2
VAND V8.B16, V9.B16, V8.B16
VAND V8.B16, V10.B16, V8.B16
VAND V8.B16, V11.B16, V8.B16
CMP R0, R6
VMOV V8.D[0], R4
VMOV V8.D[1], R5
CBZ R4, not_equal
CBZ R5, not_equal
BNE chunk64_loop
AND $0x3f, R2, R2
CBZ R2, equal
chunk16:
// work with 16-byte chunks
BIC $0xf, R2, R3
CBZ R3, tail
ADD R3, R0, R6 // end of chunks
chunk16_loop:
LDP.P 16(R0), (R4, R5)
LDP.P 16(R1), (R7, R9)
EOR R4, R7
CBNZ R7, not_equal
EOR R5, R9
CBNZ R9, not_equal
CMP R0, R6
BNE chunk16_loop
AND $0xf, R2, R2
CBZ R2, equal
tail:
// special compare of tail with length < 16
TBZ $3, R2, lt_8
MOVD (R0), R4
MOVD (R1), R5
EOR R4, R5
CBNZ R5, not_equal
SUB $8, R2, R6 // offset of the last 8 bytes
MOVD (R0)(R6), R4
MOVD (R1)(R6), R5
EOR R4, R5
CBNZ R5, not_equal
B equal
lt_8:
TBZ $2, R2, lt_4
MOVWU (R0), R4
MOVWU (R1), R5
EOR R4, R5
CBNZ R5, not_equal
SUB $4, R2, R6 // offset of the last 4 bytes
MOVWU (R0)(R6), R4
MOVWU (R1)(R6), R5
EOR R4, R5
CBNZ R5, not_equal
B equal
lt_4:
TBZ $1, R2, lt_2
MOVHU.P 2(R0), R4
MOVHU.P 2(R1), R5
CMP R4, R5
BNE not_equal
lt_2:
TBZ $0, R2, equal
one:
MOVBU (R0), R4
MOVBU (R1), R5
CMP R4, R5
BNE not_equal
equal:
MOVD $1, R0
RET
not_equal:
MOVB ZR, R0
RET

View File

@@ -0,0 +1,18 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
// Equal reports whether a and b
// are the same length and contain the same bytes.
// A nil argument is equivalent to an empty slice.
//
// Equal is equivalent to bytes.Equal.
// It is provided here for convenience,
// because some packages cannot depend on bytes.
func Equal(a, b []byte) bool {
// Neither cmd/compile nor gccgo allocates for these string conversions.
// There is a test for this in package bytes.
return string(a) == string(b)
}

View File

@@ -0,0 +1,44 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
#define REGCTXT R29
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
BEQ R4, R5, eq
ADDV R4, R6, R7
PCALIGN $16
loop:
BNE R4, R7, test
MOVV $1, R4
RET
test:
MOVBU (R4), R9
ADDV $1, R4
MOVBU (R5), R10
ADDV $1, R5
BEQ R9, R10, loop
MOVB R0, R4
RET
eq:
MOVV $1, R4
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$40-17
BEQ R4, R5, eq
MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure
MOVV R4, 8(R3)
MOVV R5, 16(R3)
MOVV R6, 24(R3)
JAL runtime·memequal(SB)
MOVBU 32(R3), R4
RET
eq:
MOVV $1, R4
RET

View File

@@ -0,0 +1,118 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips64 || mips64le
#include "go_asm.h"
#include "textflag.h"
#define REGCTXT R22
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
MOVV a+0(FP), R1
MOVV b+8(FP), R2
BEQ R1, R2, eq
MOVV size+16(FP), R3
ADDV R1, R3, R4
// chunk size is 16
SGTU $16, R3, R8
BEQ R0, R8, chunk_entry
byte_loop:
BNE R1, R4, byte_test
MOVV $1, R1
MOVB R1, ret+24(FP)
RET
byte_test:
MOVBU (R1), R6
ADDV $1, R1
MOVBU (R2), R7
ADDV $1, R2
BEQ R6, R7, byte_loop
JMP not_eq
chunk_entry:
// make sure both a and b are aligned
OR R1, R2, R9
AND $0x7, R9
BNE R0, R9, byte_loop
JMP chunk_loop_1
chunk_loop:
// chunk size is 16
SGTU $16, R3, R8
BNE R0, R8, chunk_tail_8
chunk_loop_1:
MOVV (R1), R6
MOVV (R2), R7
BNE R6, R7, not_eq
MOVV 8(R1), R12
MOVV 8(R2), R13
ADDV $16, R1
ADDV $16, R2
SUBV $16, R3
BEQ R12, R13, chunk_loop
JMP not_eq
chunk_tail_8:
AND $8, R3, R14
BEQ R0, R14, chunk_tail_4
MOVV (R1), R6
MOVV (R2), R7
BNE R6, R7, not_eq
ADDV $8, R1
ADDV $8, R2
chunk_tail_4:
AND $4, R3, R14
BEQ R0, R14, chunk_tail_2
MOVWU (R1), R6
MOVWU (R2), R7
BNE R6, R7, not_eq
ADDV $4, R1
ADDV $4, R2
chunk_tail_2:
AND $2, R3, R14
BEQ R0, R14, chunk_tail_1
MOVHU (R1), R6
MOVHU (R2), R7
BNE R6, R7, not_eq
ADDV $2, R1
ADDV $2, R2
chunk_tail_1:
AND $1, R3, R14
BEQ R0, R14, eq
MOVBU (R1), R6
MOVBU (R2), R7
BEQ R6, R7, eq
not_eq:
MOVB R0, ret+24(FP)
RET
eq:
MOVV $1, R1
MOVB R1, ret+24(FP)
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
MOVV a+0(FP), R1
MOVV b+8(FP), R2
BEQ R1, R2, eq
MOVV 8(REGCTXT), R3 // compiler stores size at offset 8 in the closure
MOVV R1, 8(R29)
MOVV R2, 16(R29)
MOVV R3, 24(R29)
JAL runtime·memequal(SB)
MOVBU 32(R29), R1
MOVB R1, ret+16(FP)
RET
eq:
MOVV $1, R1
MOVB R1, ret+16(FP)
RET

View File

@@ -0,0 +1,62 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips || mipsle
#include "go_asm.h"
#include "textflag.h"
#define REGCTXT R22
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT,$0-13
MOVW a+0(FP), R1
MOVW b+4(FP), R2
BEQ R1, R2, eq
MOVW size+8(FP), R3
ADDU R1, R3, R4
loop:
BNE R1, R4, test
MOVW $1, R1
MOVB R1, ret+12(FP)
RET
test:
MOVBU (R1), R6
ADDU $1, R1
MOVBU (R2), R7
ADDU $1, R2
BEQ R6, R7, loop
MOVB R0, ret+12(FP)
RET
eq:
MOVW $1, R1
MOVB R1, ret+12(FP)
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
MOVW a+0(FP), R1
MOVW b+4(FP), R2
BEQ R1, R2, eq
MOVW 4(REGCTXT), R3 // compiler stores size at offset 4 in the closure
ADDU R1, R3, R4
loop:
BNE R1, R4, test
MOVW $1, R1
MOVB R1, ret+8(FP)
RET
test:
MOVBU (R1), R6
ADDU $1, R1
MOVBU (R2), R7
ADDU $1, R2
BEQ R6, R7, loop
MOVB R0, ret+8(FP)
RET
eq:
MOVW $1, R1
MOVB R1, ret+8(FP)
RET

View File

@@ -0,0 +1,21 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
import "unsafe"
// The declarations below generate ABI wrappers for functions
// implemented in assembly in this package but declared in another
// package.
// The compiler generates calls to runtime.memequal and runtime.memequal_varlen.
// In addition, the runtime calls runtime.memequal explicitly.
// Those functions are implemented in this package.
//go:linkname abigen_runtime_memequal runtime.memequal
func abigen_runtime_memequal(a, b unsafe.Pointer, size uintptr) bool
//go:linkname abigen_runtime_memequal_varlen runtime.memequal_varlen
func abigen_runtime_memequal_varlen(a, b unsafe.Pointer) bool

View File

@@ -0,0 +1,207 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
#include "go_asm.h"
#include "textflag.h"
// 4K (smallest case) page size offset mask for PPC64.
#define PAGE_OFFSET 4095
// Likewise, the BC opcode is hard to read, and no extended
// mnemonics are offered for these forms.
#define BGELR_CR6 BC 4, CR6LT, (LR)
#define BEQLR BC 12, CR0EQ, (LR)
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
// R3 = a
// R4 = b
// R5 = size
BR memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
// R3 = a
// R4 = b
CMP R3, R4
BEQ eq
MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
BR memeqbody<>(SB)
eq:
MOVD $1, R3
RET
// Do an efficient memequal for ppc64
// R3 = s1
// R4 = s2
// R5 = len
// On exit:
// R3 = return value
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3, R8 // Move s1 into R8
ADD R5, R3, R9 // &s1[len(s1)]
ADD R5, R4, R10 // &s2[len(s2)]
MOVD $1, R11
CMP R5, $16 // Use GPR checks for check for len <= 16
BLE check0_16
MOVD $0, R3 // Assume no-match in case BGELR CR6 returns
CMP R5, $32 // Use overlapping VSX loads for len <= 32
BLE check17_32 // Do a pair of overlapping VSR compares
CMP R5, $64
BLE check33_64 // Hybrid check + overlap compare.
setup64:
SRD $6, R5, R6 // number of 64 byte chunks to compare
MOVD R6, CTR
MOVD $16, R14 // index for VSX loads and stores
MOVD $32, R15
MOVD $48, R16
ANDCC $0x3F, R5, R5 // len%64==0?
PCALIGN $16
loop64:
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
VCMPEQUBCC V0, V1, V2 // compare, setting CR6
BGELR_CR6
LXVD2X (R8+R14), V0
LXVD2X (R4+R14), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
LXVD2X (R8+R15), V0
LXVD2X (R4+R15), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
LXVD2X (R8+R16), V0
LXVD2X (R4+R16), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
ADD $64,R8 // bump up to next 64
ADD $64,R4
BDNZ loop64
ISEL CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0.
BEQLR // return if no tail.
ADD $-64, R9, R8
ADD $-64, R10, R4
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
LXVD2X (R8+R14), V0
LXVD2X (R4+R14), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
LXVD2X (R8+R15), V0
LXVD2X (R4+R15), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
LXVD2X (R8+R16), V0
LXVD2X (R4+R16), V1
VCMPEQUBCC V0, V1, V2
ISEL CR6LT, R11, R0, R3
RET
check33_64:
// Bytes 0-15
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
ADD $16, R8
ADD $16, R4
// Bytes 16-31
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
VCMPEQUBCC V0, V1, V2
BGELR_CR6
// A little tricky, but point R4,R8 to &sx[len-32],
// and reuse check17_32 to check the next 1-31 bytes (with some overlap)
ADD $-32, R9, R8
ADD $-32, R10, R4
// Fallthrough
check17_32:
LXVD2X (R8+R0), V0
LXVD2X (R4+R0), V1
VCMPEQUBCC V0, V1, V2
ISEL CR6LT, R11, R0, R5
// Load sX[len(sX)-16:len(sX)] and compare.
ADD $-16, R9
ADD $-16, R10
LXVD2X (R9+R0), V0
LXVD2X (R10+R0), V1
VCMPEQUBCC V0, V1, V2
ISEL CR6LT, R5, R0, R3
RET
check0_16:
#ifdef GOPPC64_power10
SLD $56, R5, R7
LXVL R8, R7, V0
LXVL R4, R7, V1
VCMPEQUDCC V0, V1, V2
ISEL CR6LT, R11, R0, R3
RET
#else
CMP R5, $8
BLT check0_7
// Load sX[0:7] and compare.
MOVD (R8), R6
MOVD (R4), R7
CMP R6, R7
ISEL CR0EQ, R11, R0, R5
// Load sX[len(sX)-8:len(sX)] and compare.
MOVD -8(R9), R6
MOVD -8(R10), R7
CMP R6, R7
ISEL CR0EQ, R5, R0, R3
RET
check0_7:
CMP R5,$0
MOVD $1, R3
BEQLR // return if len == 0
// Check < 8B loads with a single compare, but select the load address
// such that it cannot cross a page boundary. Load a few bytes from the
// lower address if that does not cross the lower page. Or, load a few
// extra bytes from the higher addresses. And align those values
// consistently in register as either address may have differing
// alignment requirements.
ANDCC $PAGE_OFFSET, R8, R6 // &sX & PAGE_OFFSET
ANDCC $PAGE_OFFSET, R4, R9
SUBC R5, $8, R12 // 8-len
SLD $3, R12, R14 // (8-len)*8
CMPU R6, R12, CR1 // Enough bytes lower in the page to load lower?
CMPU R9, R12, CR0
SUB R12, R8, R6 // compute lower load address
SUB R12, R4, R9
ISEL CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len))
ISEL CR0LT, R4, R9, R4 // Similar for s2
MOVD (R8), R15
MOVD (R4), R16
SLD R14, R15, R7
SLD R14, R16, R17
SRD R14, R7, R7 // Clear the upper (8-len) bytes (with 2 shifts)
SRD R14, R17, R17
SRD R14, R15, R6 // Clear the lower (8-len) bytes
SRD R14, R16, R9
#ifdef GOARCH_ppc64le
ISEL CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment
ISEL CR0LT, R17, R9, R4
#else
ISEL CR1LT, R6, R7, R8
ISEL CR0LT, R9, R17, R4
#endif
CMP R4, R8
ISEL CR0EQ, R11, R0, R3
RET
#endif // tail processing if !defined(GOPPC64_power10)

View File

@@ -0,0 +1,126 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
#define CTXT S10
// func memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
// X10 = a_base
// X11 = b_base
// X12 = size
JMP memequal<>(SB)
// func memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure
// X10 = a_base
// X11 = b_base
JMP memequal<>(SB)
// On entry X10 and X11 contain pointers, X12 contains length.
// For non-regabi X13 contains address for return value.
// For regabi return value in X10.
TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
BEQ X10, X11, eq
MOV $32, X23
BLT X12, X23, loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
AND $7, X10, X9
AND $7, X11, X19
BNE X9, X19, loop4_check
BEQZ X9, loop32_check
// Check one byte at a time until we reach 8 byte alignment.
SUB X9, X0, X9
ADD $8, X9, X9
SUB X9, X12, X12
align:
SUB $1, X9
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
BNEZ X9, align
loop32_check:
MOV $32, X9
BLT X12, X9, loop16_check
loop32:
MOV 0(X10), X19
MOV 0(X11), X20
MOV 8(X10), X21
MOV 8(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
MOV 16(X10), X14
MOV 16(X11), X15
MOV 24(X10), X16
MOV 24(X11), X17
BNE X14, X15, not_eq
BNE X16, X17, not_eq
ADD $32, X10
ADD $32, X11
SUB $32, X12
BGE X12, X9, loop32
BEQZ X12, eq
loop16_check:
MOV $16, X23
BLT X12, X23, loop4_check
loop16:
MOV 0(X10), X19
MOV 0(X11), X20
MOV 8(X10), X21
MOV 8(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
ADD $16, X10
ADD $16, X11
SUB $16, X12
BGE X12, X23, loop16
BEQZ X12, eq
loop4_check:
MOV $4, X23
BLT X12, X23, loop1
loop4:
MOVBU 0(X10), X19
MOVBU 0(X11), X20
MOVBU 1(X10), X21
MOVBU 1(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
MOVBU 2(X10), X14
MOVBU 2(X11), X15
MOVBU 3(X10), X16
MOVBU 3(X11), X17
BNE X14, X15, not_eq
BNE X16, X17, not_eq
ADD $4, X10
ADD $4, X11
SUB $4, X12
BGE X12, X23, loop4
loop1:
BEQZ X12, eq
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
SUB $1, X12
JMP loop1
not_eq:
MOVB ZERO, X10
RET
eq:
MOV $1, X10
RET

View File

@@ -0,0 +1,92 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
MOVD a+0(FP), R3
MOVD b+8(FP), R5
MOVD size+16(FP), R6
LA ret+24(FP), R7
BR memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
MOVD a+0(FP), R3
MOVD b+8(FP), R5
MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure
LA ret+16(FP), R7
BR memeqbody<>(SB)
// input:
// R3 = a
// R5 = b
// R6 = len
// R7 = address of output byte (stores 0 or 1 here)
// a and b have the same length
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, equal
loop:
CMPBEQ R6, $0, equal
CMPBLT R6, $32, tiny
CMP R6, $256
BLT tail
CLC $256, 0(R3), 0(R5)
BNE notequal
SUB $256, R6
LA 256(R3), R3
LA 256(R5), R5
BR loop
tail:
SUB $1, R6, R8
EXRL $memeqbodyclc<>(SB), R8
BEQ equal
notequal:
MOVB $0, 0(R7)
RET
equal:
MOVB $1, 0(R7)
RET
tiny:
MOVD $0, R2
CMPBLT R6, $16, lt16
MOVD 0(R3), R8
MOVD 0(R5), R9
CMPBNE R8, R9, notequal
MOVD 8(R3), R8
MOVD 8(R5), R9
CMPBNE R8, R9, notequal
LA 16(R2), R2
SUB $16, R6
lt16:
CMPBLT R6, $8, lt8
MOVD 0(R3)(R2*1), R8
MOVD 0(R5)(R2*1), R9
CMPBNE R8, R9, notequal
LA 8(R2), R2
SUB $8, R6
lt8:
CMPBLT R6, $4, lt4
MOVWZ 0(R3)(R2*1), R8
MOVWZ 0(R5)(R2*1), R9
CMPBNE R8, R9, notequal
LA 4(R2), R2
SUB $4, R6
lt4:
#define CHECK(n) \
CMPBEQ R6, $n, equal \
MOVB n(R3)(R2*1), R8 \
MOVB n(R5)(R2*1), R9 \
CMPBNE R8, R9, notequal
CHECK(0)
CHECK(1)
CHECK(2)
CHECK(3)
BR equal
TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
RET

View File

@@ -0,0 +1,77 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(p, q unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB), NOSPLIT, $0-25
Get SP
I64Load a+0(FP)
I64Load b+8(FP)
I64Load size+16(FP)
Call memeqbody<>(SB)
I64Store8 ret+24(FP)
RET
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB), NOSPLIT, $0-17
Get SP
I64Load a+0(FP)
I64Load b+8(FP)
I64Load 8(CTXT) // compiler stores size at offset 8 in the closure
Call memeqbody<>(SB)
I64Store8 ret+16(FP)
RET
// params: a, b, len
// ret: 0/1
TEXT memeqbody<>(SB), NOSPLIT, $0-0
Get R0
Get R1
I64Eq
If
I64Const $1
Return
End
loop:
Loop
Get R2
I64Eqz
If
I64Const $1
Return
End
Get R0
I32WrapI64
I64Load8U $0
Get R1
I32WrapI64
I64Load8U $0
I64Ne
If
I64Const $0
Return
End
Get R0
I64Const $1
I64Add
Set R0
Get R1
I64Const $1
I64Add
Set R1
Get R2
I64Const $1
I64Sub
Set R2
Br loop
End
UNDEF

View File

@@ -0,0 +1,26 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
import "internal/cpu"
const MaxBruteForce = 64
func init() {
if cpu.X86.HasAVX2 {
MaxLen = 63
} else {
MaxLen = 31
}
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
// 1 error per 8 characters, plus a few slop to start.
return (n + 16) / 8
}

View File

@@ -0,0 +1,278 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Index(SB),NOSPLIT,$0-56
MOVQ a_base+0(FP), DI
MOVQ a_len+8(FP), DX
MOVQ b_base+24(FP), R8
MOVQ b_len+32(FP), AX
MOVQ DI, R10
LEAQ ret+48(FP), R11
JMP indexbody<>(SB)
TEXT ·IndexString(SB),NOSPLIT,$0-40
MOVQ a_base+0(FP), DI
MOVQ a_len+8(FP), DX
MOVQ b_base+16(FP), R8
MOVQ b_len+24(FP), AX
MOVQ DI, R10
LEAQ ret+32(FP), R11
JMP indexbody<>(SB)
// AX: length of string, that we are searching for
// DX: length of string, in which we are searching
// DI: pointer to string, in which we are searching
// R8: pointer to string, that we are searching for
// R11: address, where to put return value
// Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
TEXT indexbody<>(SB),NOSPLIT,$0
CMPQ AX, DX
JA fail
CMPQ DX, $16
JAE sse42
no_sse42:
CMPQ AX, $2
JA _3_or_more
MOVW (R8), R8
LEAQ -1(DI)(DX*1), DX
PCALIGN $16
loop2:
MOVW (DI), SI
CMPW SI,R8
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop2
JMP fail
_3_or_more:
CMPQ AX, $3
JA _4_or_more
MOVW 1(R8), BX
MOVW (R8), R8
LEAQ -2(DI)(DX*1), DX
loop3:
MOVW (DI), SI
CMPW SI,R8
JZ partial_success3
ADDQ $1,DI
CMPQ DI,DX
JB loop3
JMP fail
partial_success3:
MOVW 1(DI), SI
CMPW SI,BX
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop3
JMP fail
_4_or_more:
CMPQ AX, $4
JA _5_or_more
MOVL (R8), R8
LEAQ -3(DI)(DX*1), DX
loop4:
MOVL (DI), SI
CMPL SI,R8
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop4
JMP fail
_5_or_more:
CMPQ AX, $7
JA _8_or_more
LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX
MOVL -4(R8)(AX*1), BX
MOVL (R8), R8
loop5to7:
MOVL (DI), SI
CMPL SI,R8
JZ partial_success5to7
ADDQ $1,DI
CMPQ DI,DX
JB loop5to7
JMP fail
partial_success5to7:
MOVL -4(AX)(DI*1), SI
CMPL SI,BX
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop5to7
JMP fail
_8_or_more:
CMPQ AX, $8
JA _9_or_more
MOVQ (R8), R8
LEAQ -7(DI)(DX*1), DX
loop8:
MOVQ (DI), SI
CMPQ SI,R8
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop8
JMP fail
_9_or_more:
CMPQ AX, $15
JA _16_or_more
LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX
MOVQ -8(R8)(AX*1), BX
MOVQ (R8), R8
loop9to15:
MOVQ (DI), SI
CMPQ SI,R8
JZ partial_success9to15
ADDQ $1,DI
CMPQ DI,DX
JB loop9to15
JMP fail
partial_success9to15:
MOVQ -8(AX)(DI*1), SI
CMPQ SI,BX
JZ success
ADDQ $1,DI
CMPQ DI,DX
JB loop9to15
JMP fail
_16_or_more:
CMPQ AX, $16
JA _17_or_more
MOVOU (R8), X1
LEAQ -15(DI)(DX*1), DX
loop16:
MOVOU (DI), X2
PCMPEQB X1, X2
PMOVMSKB X2, SI
CMPQ SI, $0xffff
JE success
ADDQ $1,DI
CMPQ DI,DX
JB loop16
JMP fail
_17_or_more:
CMPQ AX, $31
JA _32_or_more
LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX
MOVOU -16(R8)(AX*1), X0
MOVOU (R8), X1
loop17to31:
MOVOU (DI), X2
PCMPEQB X1,X2
PMOVMSKB X2, SI
CMPQ SI, $0xffff
JE partial_success17to31
ADDQ $1,DI
CMPQ DI,DX
JB loop17to31
JMP fail
partial_success17to31:
MOVOU -16(AX)(DI*1), X3
PCMPEQB X0, X3
PMOVMSKB X3, SI
CMPQ SI, $0xffff
JE success
ADDQ $1,DI
CMPQ DI,DX
JB loop17to31
JMP fail
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
// So no need to check cpuid
_32_or_more:
CMPQ AX, $32
JA _33_to_63
VMOVDQU (R8), Y1
LEAQ -31(DI)(DX*1), DX
loop32:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPMOVMSKB Y3, SI
CMPL SI, $0xffffffff
JE success_avx2
ADDQ $1,DI
CMPQ DI,DX
JB loop32
JMP fail_avx2
_33_to_63:
LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX
VMOVDQU -32(R8)(AX*1), Y0
VMOVDQU (R8), Y1
loop33to63:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPMOVMSKB Y3, SI
CMPL SI, $0xffffffff
JE partial_success33to63
ADDQ $1,DI
CMPQ DI,DX
JB loop33to63
JMP fail_avx2
partial_success33to63:
VMOVDQU -32(AX)(DI*1), Y3
VPCMPEQB Y0, Y3, Y4
VPMOVMSKB Y4, SI
CMPL SI, $0xffffffff
JE success_avx2
ADDQ $1,DI
CMPQ DI,DX
JB loop33to63
fail_avx2:
VZEROUPPER
fail:
MOVQ $-1, (R11)
RET
success_avx2:
VZEROUPPER
JMP success
sse42:
#ifndef hasSSE42
CMPB internalcpu·X86+const_offsetX86HasSSE42(SB), $1
JNE no_sse42
#endif
CMPQ AX, $12
// PCMPESTRI is slower than normal compare,
// so using it makes sense only if we advance 4+ bytes per compare
// This value was determined experimentally and is the ~same
// on Nehalem (first with SSE42) and Haswell.
JAE _9_or_more
LEAQ 16(R8), SI
TESTW $0xff0, SI
JEQ no_sse42
MOVOU (R8), X1
LEAQ -15(DI)(DX*1), SI
MOVQ $16, R9
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
PCALIGN $16
loop_sse42:
// 0x0c means: unsigned byte compare (bits 0,1 are 00)
// for equality (bits 2,3 are 11)
// result is not masked or inverted (bits 4,5 are 00)
// and corresponds to first matching byte (bit 6 is 0)
PCMPESTRI $0x0c, (DI), X1
// CX == 16 means no match,
// CX > R9 means partial match at the end of the string,
// otherwise sep is at offset CX from X1 start
CMPQ CX, R9
JBE sse42_success
ADDQ R9, DI
CMPQ DI, SI
JB loop_sse42
PCMPESTRI $0x0c, -1(SI), X1
CMPQ CX, R9
JA fail
LEAQ -1(SI), DI
sse42_success:
ADDQ CX, DI
success:
SUBQ R10, DI
MOVQ DI, (R11)
RET

View File

@@ -0,0 +1,23 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
// Empirical data shows that using Index can get better
// performance when len(s) <= 16.
const MaxBruteForce = 16
func init() {
// Optimize cases where the length of the substring is less than 32 bytes
MaxLen = 32
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
// 1 error per 16 characters, plus a few slop to start.
return 4 + n>>4
}

View File

@@ -0,0 +1,206 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Index(SB),NOSPLIT,$0-56
MOVD a_base+0(FP), R0
MOVD a_len+8(FP), R1
MOVD b_base+24(FP), R2
MOVD b_len+32(FP), R3
MOVD $ret+48(FP), R9
B indexbody<>(SB)
TEXT ·IndexString(SB),NOSPLIT,$0-40
MOVD a_base+0(FP), R0
MOVD a_len+8(FP), R1
MOVD b_base+16(FP), R2
MOVD b_len+24(FP), R3
MOVD $ret+32(FP), R9
B indexbody<>(SB)
// input:
// R0: haystack
// R1: length of haystack
// R2: needle
// R3: length of needle (2 <= len <= 32)
// R9: address to put result
TEXT indexbody<>(SB),NOSPLIT,$0-56
// main idea is to load 'sep' into separate register(s)
// to avoid repeatedly re-load it again and again
// for sebsequent substring comparisons
SUB R3, R1, R4
// R4 contains the start of last substring for comparison
ADD R0, R4, R4
ADD $1, R0, R8
CMP $8, R3
BHI greater_8
TBZ $3, R3, len_2_7
len_8:
// R5 contains 8-byte of sep
MOVD (R2), R5
loop_8:
// R6 contains substring for comparison
CMP R4, R0
BHI not_found
MOVD.P 1(R0), R6
CMP R5, R6
BNE loop_8
B found
len_2_7:
TBZ $2, R3, len_2_3
TBZ $1, R3, len_4_5
TBZ $0, R3, len_6
len_7:
// R5 and R6 contain 7-byte of sep
MOVWU (R2), R5
// 1-byte overlap with R5
MOVWU 3(R2), R6
loop_7:
CMP R4, R0
BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
BNE loop_7
MOVWU 2(R0), R3
CMP R6, R3
BNE loop_7
B found
len_6:
// R5 and R6 contain 6-byte of sep
MOVWU (R2), R5
MOVHU 4(R2), R6
loop_6:
CMP R4, R0
BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
BNE loop_6
MOVHU 3(R0), R3
CMP R6, R3
BNE loop_6
B found
len_4_5:
TBZ $0, R3, len_4
len_5:
// R5 and R7 contain 5-byte of sep
MOVWU (R2), R5
MOVBU 4(R2), R7
loop_5:
CMP R4, R0
BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
BNE loop_5
MOVBU 3(R0), R3
CMP R7, R3
BNE loop_5
B found
len_4:
// R5 contains 4-byte of sep
MOVWU (R2), R5
loop_4:
CMP R4, R0
BHI not_found
MOVWU.P 1(R0), R6
CMP R5, R6
BNE loop_4
B found
len_2_3:
TBZ $0, R3, len_2
len_3:
// R6 and R7 contain 3-byte of sep
MOVHU (R2), R6
MOVBU 2(R2), R7
loop_3:
CMP R4, R0
BHI not_found
MOVHU.P 1(R0), R3
CMP R6, R3
BNE loop_3
MOVBU 1(R0), R3
CMP R7, R3
BNE loop_3
B found
len_2:
// R5 contains 2-byte of sep
MOVHU (R2), R5
loop_2:
CMP R4, R0
BHI not_found
MOVHU.P 1(R0), R6
CMP R5, R6
BNE loop_2
found:
SUB R8, R0, R0
MOVD R0, (R9)
RET
not_found:
MOVD $-1, R0
MOVD R0, (R9)
RET
greater_8:
SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes
CMP $16, R3
BHI greater_16
len_9_16:
MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep
SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes
MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep
loop_9_16:
// search the first 8 bytes first
CMP R4, R0
BHI not_found
MOVD.P 1(R0), R7
CMP R5, R7
BNE loop_9_16
MOVD (R0)(R11), R7
CMP R6, R7 // compare the last 8 bytes
BNE loop_9_16
B found
greater_16:
CMP $24, R3
BHI len_25_32
len_17_24:
LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep
SUB $24, R3, R10 // len(sep) - 24
MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep
loop_17_24:
// search the first 16 bytes first
CMP R4, R0
BHI not_found
MOVD.P 1(R0), R10
CMP R5, R10
BNE loop_17_24
MOVD 7(R0), R10
CMP R6, R10
BNE loop_17_24
MOVD (R0)(R11), R10
CMP R7, R10 // compare the last 8 bytes
BNE loop_17_24
B found
len_25_32:
LDP.P 16(R2), (R5, R6)
MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep
SUB $32, R3, R12 // len(sep) - 32
MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep
loop_25_32:
// search the first 24 bytes first
CMP R4, R0
BHI not_found
MOVD.P 1(R0), R12
CMP R5, R12
BNE loop_25_32
MOVD 7(R0), R12
CMP R6, R12
BNE loop_25_32
MOVD 15(R0), R12
CMP R7, R12
BNE loop_25_32
MOVD (R0)(R11), R12
CMP R10, R12 // compare the last 8 bytes
BNE loop_25_32
B found

View File

@@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 && !arm64 && !s390x && !ppc64le && !ppc64
package bytealg
const MaxBruteForce = 0
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func Index(a, b []byte) int {
panic("unimplemented")
}
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func IndexString(a, b string) int {
panic("unimplemented")
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
panic("unimplemented")
}

View File

@@ -0,0 +1,19 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build amd64 || arm64 || s390x || ppc64le || ppc64
package bytealg
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
//
//go:noescape
func Index(a, b []byte) int
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
//
//go:noescape
func IndexString(a, b string) int

View File

@@ -0,0 +1,26 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
package bytealg
import "internal/cpu"
const MaxBruteForce = 16
var SupportsPower9 = cpu.PPC64.IsPOWER9
func init() {
MaxLen = 32
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
// 1 error per 8 characters, plus a few slop to start.
return (n + 16) / 8
}

View File

@@ -0,0 +1,841 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This is an implementation based on the s390x
// implementation.
// Find a separator with 2 <= len <= 32 within a string.
// Separators with lengths of 2, 3 or 4 are handled
// specially.
// This works on power8 and above. The loads and
// compares are done in big endian order
// since that allows the used of VCLZD, and allows
// the same implementation to work on big and little
// endian platforms with minimal conditional changes.
// NOTE: There is a power9 implementation that
// improves performance by 10-15% on little
// endian for some of the benchmarks.
// Unrolled index2to16 loop by 4 on ppc64le/power9
// Work is still needed for a big endian
// implementation on power9.
//go:build ppc64 || ppc64le
#include "go_asm.h"
#include "textflag.h"
// Needed to swap LXVD2X loads to the correct
// byte order to work on POWER8.
#ifdef GOARCH_ppc64
DATA byteswap<>+0(SB)/8, $0x0001020304050607
DATA byteswap<>+8(SB)/8, $0x08090a0b0c0d0e0f
#else
DATA byteswap<>+0(SB)/8, $0x0706050403020100
DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
#endif
// Load bytes in big endian order. Address
// alignment does not need checking.
#define VLOADSWAP(base, index, vreg, vsreg) \
LXVD2X (base)(index), vsreg; \
VPERM vreg, vreg, SWAP, vreg
GLOBL byteswap<>+0(SB), RODATA, $16
TEXT ·Index<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
// R3 = byte array pointer
// R4 = length
MOVD R6, R5 // R5 = separator pointer
MOVD R7, R6 // R6 = separator length
#ifdef GOARCH_ppc64le
MOVBZ internalcpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
CMP R7, $1
BNE power8
BR indexbodyp9<>(SB)
#endif
power8:
BR indexbody<>(SB)
TEXT ·IndexString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// R3 = string
// R4 = length
// R5 = separator pointer
// R6 = separator length
#ifdef GOARCH_ppc64le
MOVBZ internalcpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
CMP R7, $1
BNE power8
BR indexbodyp9<>(SB)
#endif
power8:
BR indexbody<>(SB)
// s: string we are searching
// sep: string to search for
// R3=&s[0], R4=len(s)
// R5=&sep[0], R6=len(sep)
// R14=&ret (index where sep found)
// R7=working addr of string
// R16=index value 16
// R17=index value 17
// R18=index value 18
// R19=index value 1
// R26=LASTBYTE of string
// R27=LASTSTR last start byte to compare with sep
// R8, R9 scratch
// V0=sep left justified zero fill
// CR4=sep length >= 16
#define SEPMASK V17
#define LASTBYTE R26
#define LASTSTR R27
#define ONES V20
#define SWAP V21
#define SWAP_ VS53
TEXT indexbody<>(SB), NOSPLIT|NOFRAME, $0
CMP R6, R4 // Compare lengths
BGT notfound // If sep len is > string, notfound
ADD R4, R3, LASTBYTE // find last byte addr
SUB R6, LASTBYTE, LASTSTR // LAST=&s[len(s)-len(sep)] (last valid start index)
CMP R6, $0 // Check sep len
BEQ notfound // sep len 0 -- not found
MOVD R3, R7 // Copy of string addr
MOVD $16, R16 // Index value 16
MOVD $17, R17 // Index value 17
MOVD $18, R18 // Index value 18
MOVD $1, R19 // Index value 1
MOVD $byteswap<>+00(SB), R8
VSPLTISB $0xFF, ONES // splat all 1s
LXVD2X (R8)(R0), SWAP_ // Set up swap string
CMP R6, $16, CR4 // CR4 for len(sep) >= 16
VOR ONES, ONES, SEPMASK // Set up full SEPMASK
BGE CR4, loadge16 // Load for len(sep) >= 16
SUB R6, R16, R9 // 16-len of sep
SLD $3, R9 // Set up for VSLO
MTVSRD R9, V9 // Set up for VSLO
VSLDOI $8, V9, V9, V9 // Set up for VSLO
VSLO ONES, V9, SEPMASK // Mask for separator len(sep) < 16
loadge16:
ANDCC $15, R5, R9 // Find byte offset of sep
ADD R9, R6, R10 // Add sep len
CMP R10, $16 // Check if sep len+offset > 16
BGT sepcross16 // Sep crosses 16 byte boundary
RLDICR $0, R5, $59, R8 // Adjust addr to 16 byte container
VLOADSWAP(R8, R0, V0, V0) // Load 16 bytes @R8 into V0
SLD $3, R9 // Set up shift count for VSLO
MTVSRD R9, V8 // Set up shift count for VSLO
VSLDOI $8, V8, V8, V8
VSLO V0, V8, V0 // Shift by start byte
VAND V0, SEPMASK, V0 // Mask separator (< 16)
BR index2plus
sepcross16:
VLOADSWAP(R5, R0, V0, V0) // Load 16 bytes @R5 into V0
VAND V0, SEPMASK, V0 // mask out separator
BLE CR4, index2to16
BR index17plus // Handle sep > 16
index2plus:
CMP R6, $2 // Check length of sep
BNE index3plus // If not 2, check for 3
ADD $16, R7, R9 // Check if next 16 bytes past last
CMP R9, LASTBYTE // compare with last
BGE index2to16 // 2 <= len(string) <= 16
MOVD $0xff00, R21 // Mask for later
MTVSRD R21, V25 // Move to Vreg
VSPLTH $3, V25, V31 // Splat mask
VSPLTH $0, V0, V1 // Splat 1st 2 bytes of sep
VSPLTISB $0, V10 // Clear V10
// First case: 2 byte separator
// V1: 2 byte separator splatted
// V2: 16 bytes at addr
// V4: 16 bytes at addr+1
// Compare 2 byte separator at start
// and at start+1. Use VSEL to combine
// those results to find the first
// matching start byte, returning
// that value when found. Loop as
// long as len(string) > 16
index2loop2:
VLOADSWAP(R7, R19, V3, V3) // Load 16 bytes @R7+1 into V3
index2loop:
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
VCMPEQUH V1, V2, V5 // Search for sep
VCMPEQUH V1, V3, V6 // Search for sep offset by 1
VSEL V6, V5, V31, V7 // merge even and odd indices
VCLZD V7, V18 // find index of first match
MFVSRD V18, R25 // get first value
CMP R25, $64 // Found if < 64
BLT foundR25 // Return byte index where found
VSLDOI $8, V18, V18, V18 // Adjust 2nd value
MFVSRD V18, R25 // get second value
CMP R25, $64 // Found if < 64
ADD $64, R25 // Update byte offset
BLT foundR25 // Return value
ADD $16, R7 // R7+=16 Update string pointer
ADD $17, R7, R9 // R9=F7+17 since loop unrolled
CMP R9, LASTBYTE // Compare addr+17 against last byte
BLT index2loop2 // If < last, continue loop
CMP R7, LASTBYTE // Compare addr+16 against last byte
BLT index2to16 // If < 16 handle specially
VLOADSWAP(R7, R0, V3, V3) // Load 16 bytes @R7 into V3
VSLDOI $1, V3, V10, V3 // Shift left by 1 byte
BR index2loop
index3plus:
CMP R6, $3 // Check if sep == 3
BNE index4plus // If not check larger
ADD $19, R7, R9 // Find bytes for use in this loop
CMP R9, LASTBYTE // Compare against last byte
BGE index2to16 // Remaining string 2<=len<=16
MOVD $0xff00, R21 // Set up mask for upcoming loop
MTVSRD R21, V25 // Move mask to Vreg
VSPLTH $3, V25, V31 // Splat mask
VSPLTH $0, V0, V1 // Splat 1st two bytes of sep
VSPLTB $2, V0, V8 // Splat 3rd byte of sep
// Loop to process 3 byte separator.
// string[0:16] is in V2
// string[2:18] is in V3
// sep[0:2] splatted in V1
// sec[3] splatted in v8
// Load vectors at string, string+1
// and string+2. Compare string, string+1
// against first 2 bytes of separator
// splatted, and string+2 against 3rd
// byte splatted. Merge the results with
// VSEL to find the first byte of a match.
// Special handling for last 16 bytes if the
// string fits in 16 byte multiple.
index3loop2:
MOVD $2, R21 // Set up index for 2
VSPLTISB $0, V10 // Clear V10
VLOADSWAP(R7, R21, V3, V3)// Load 16 bytes @R7+2 into V3
VSLDOI $14, V3, V10, V3 // Left justify next 2 bytes
index3loop:
VLOADSWAP(R7, R0, V2, V2) // Load with correct order
VSLDOI $1, V2, V3, V4 // string[1:17]
VSLDOI $2, V2, V3, V9 // string[2:18]
VCMPEQUH V1, V2, V5 // compare hw even indices
VCMPEQUH V1, V4, V6 // compare hw odd indices
VCMPEQUB V8, V9, V10 // compare 3rd to last byte
VSEL V6, V5, V31, V7 // Find 1st matching byte using mask
VAND V7, V10, V7 // AND matched bytes with matched 3rd byte
VCLZD V7, V18 // Find first nonzero indexes
MFVSRD V18, R25 // Move 1st doubleword
CMP R25, $64 // If < 64 found
BLT foundR25 // Return matching index
VSLDOI $8, V18, V18, V18 // Move value
MFVSRD V18, R25 // Move 2nd doubleword
CMP R25, $64 // If < 64 found
ADD $64, R25 // Update byte index
BLT foundR25 // Return matching index
ADD $16, R7 // R7+=16 string ptr
ADD $19, R7, R9 // Number of string bytes for loop
CMP R9, LASTBYTE // Compare against last byte of string
BLT index3loop2 // If within, continue this loop
CMP R7, LASTSTR // Compare against last start byte
BLT index2to16 // Process remainder
VSPLTISB $0, V3 // Special case for last 16 bytes
BR index3loop // Continue this loop
// Loop to process 4 byte separator
// string[0:16] in V2
// string[3:16] in V3
// sep[0:4] splatted in V1
// Set up vectors with strings at offsets
// 0, 1, 2, 3 and compare against the 4 byte
// separator also splatted. Use VSEL with the
// compare results to find the first byte where
// a separator match is found.
index4plus:
CMP R6, $4 // Check if 4 byte separator
BNE index5plus // If not next higher
ADD $20, R7, R9 // Check string size to load
CMP R9, LASTBYTE // Verify string length
BGE index2to16 // If not large enough, process remaining
MOVD $2, R15 // Set up index
// Set up masks for use with VSEL
MOVD $0xff, R21 // Set up mask 0xff000000ff000000...
SLD $24, R21
MTVSRD R21, V10
VSPLTW $1, V10, V29
VSLDOI $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
MOVD $0xffff, R21
SLD $16, R21
MTVSRD R21, V10
VSPLTW $1, V10, V31 // Mask 0xffff0000ffff0000...
VSPLTW $0, V0, V1 // Splat 1st word of separator
index4loop:
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
next4:
VSPLTISB $0, V10 // Clear
MOVD $3, R9 // Number of bytes beyond 16
VLOADSWAP(R7, R9, V3, V3) // Load 16 bytes @R7+3 into V3
VSLDOI $13, V3, V10, V3 // Shift left last 3 bytes
VSLDOI $1, V2, V3, V4 // V4=(V2:V3)<<1
VSLDOI $2, V2, V3, V9 // V9=(V2:V3)<<2
VSLDOI $3, V2, V3, V10 // V10=(V2:v3)<<3
VCMPEQUW V1, V2, V5 // compare index 0, 4, ... with sep
VCMPEQUW V1, V4, V6 // compare index 1, 5, ... with sep
VCMPEQUW V1, V9, V11 // compare index 2, 6, ... with sep
VCMPEQUW V1, V10, V12 // compare index 3, 7, ... with sep
VSEL V6, V5, V29, V13 // merge index 0, 1, 4, 5, using mask
VSEL V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
VSEL V14, V13, V31, V7 // final merge
VCLZD V7, V18 // Find first index for each half
MFVSRD V18, R25 // Isolate value
CMP R25, $64 // If < 64, found
BLT foundR25 // Return found index
VSLDOI $8, V18, V18, V18 // Move for MFVSRD
MFVSRD V18, R25 // Isolate other value
CMP R25, $64 // If < 64, found
ADD $64, R25 // Update index for high doubleword
BLT foundR25 // Return found index
ADD $16, R7 // R7+=16 for next string
ADD $20, R7, R9 // R+20 for all bytes to load
CMP R9, LASTBYTE // Past end? Maybe check for extra?
BLT index4loop // If not, continue loop
CMP R7, LASTSTR // Check remainder
BLE index2to16 // Process remainder
BR notfound // Not found
index5plus:
CMP R6, $16 // Check for sep > 16
BGT index17plus // Handle large sep
// Assumption is that the separator is smaller than the string at this point
index2to16:
CMP R7, LASTSTR // Compare last start byte
BGT notfound // last takes len(sep) into account
ADD $16, R7, R9 // Check for last byte of string
CMP R9, LASTBYTE
BGT index2to16tail
// At least 16 bytes of string left
// Mask the number of bytes in sep
index2to16loop:
VLOADSWAP(R7, R0, V1, V1) // Load 16 bytes @R7 into V1
compare:
VAND V1, SEPMASK, V2 // Mask out sep size
VCMPEQUBCC V0, V2, V3 // Compare masked string
BLT CR6, found // All equal
ADD $1, R7 // Update ptr to next byte
CMP R7, LASTSTR // Still less than last start byte
BGT notfound // Not found
ADD $16, R7, R9 // Verify remaining bytes
CMP R9, LASTBYTE // At least 16
BLT index2to16loop // Try again
// Less than 16 bytes remaining in string
// Separator >= 2
index2to16tail:
ADD R3, R4, R9 // End of string
SUB R7, R9, R9 // Number of bytes left
ANDCC $15, R7, R10 // 16 byte offset
ADD R10, R9, R11 // offset + len
CMP R11, $16 // >= 16?
BLE short // Does not cross 16 bytes
VLOADSWAP(R7, R0, V1, V1) // Load 16 bytes @R7 into V1
BR index2to16next // Continue on
short:
RLDICR $0, R7, $59, R9 // Adjust addr to 16 byte container
VLOADSWAP(R9, R0, V1, V1)// Load 16 bytes @R9 into V1
SLD $3, R10 // Set up shift
MTVSRD R10, V8 // Set up shift
VSLDOI $8, V8, V8, V8
VSLO V1, V8, V1 // Shift by start byte
VSPLTISB $0, V25 // Clear for later use
index2to16next:
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BGT notfound // If at end, then not found
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte
BR index2to16next // Check the next partial string
index17plus:
CMP R6, $32 // Check if 17 < len(sep) <= 32
BGT index33plus
SUB $16, R6, R9 // Extra > 16
SLD $56, R9, R10 // Shift to use in VSLO
MTVSRD R10, V9 // Set up for VSLO
VLOADSWAP(R5, R9, V1, V1)// Load 16 bytes @R5+R9 into V1
VSLO V1, V9, V1 // Shift left
VSPLTISB $0xff, V7 // Splat 1s
VSPLTISB $0, V27 // Splat 0
index17to32loop:
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
next17:
VLOADSWAP(R7, R9, V3, V3) // Load 16 bytes @R7+R9 into V3
VSLO V3, V9, V3 // Shift left
VCMPEQUB V0, V2, V4 // Compare first 16 bytes
VCMPEQUB V1, V3, V5 // Compare extra over 16 bytes
VAND V4, V5, V6 // Check if both equal
VCMPEQUBCC V6, V7, V8 // All equal?
BLT CR6, found // Yes
ADD $1, R7 // On to next byte
CMP R7, LASTSTR // Check if last start byte
BGT notfound // If too high, not found
BR index17to32loop // Continue
notfound:
MOVD $-1, R3 // Return -1 if not found
RET
index33plus:
MOVD $0, (R0) // Case not implemented
RET // Crash before return
foundR25:
SRD $3, R25 // Convert from bits to bytes
ADD R25, R7 // Add to current string address
SUB R3, R7 // Subtract from start of string
MOVD R7, R3 // Return byte where found
RET
found:
SUB R3, R7 // Return byte where found
MOVD R7, R3
RET
TEXT indexbodyp9<>(SB), NOSPLIT|NOFRAME, $0
CMP R6, R4 // Compare lengths
BGT notfound // If sep len is > string, notfound
ADD R4, R3, LASTBYTE // find last byte addr
SUB R6, LASTBYTE, LASTSTR // LAST=&s[len(s)-len(sep)] (last valid start index)
CMP R6, $0 // Check sep len
BEQ notfound // sep len 0 -- not found
MOVD R3, R7 // Copy of string addr
#ifndef GOPPC64_power10
MOVD $16, R16 // Index value 16
MOVD $17, R17 // Index value 17
MOVD $18, R18 // Index value 18
VSPLTISB $0xFF, ONES // splat all 1s
VOR ONES, ONES, SEPMASK // Set up full SEPMASK
#else
SLD $56, R6, R14 // Set up separator length for LXVLL
#endif
MOVD $1, R19 // Index value 1
CMP R6, $16, CR4 // CR4 for len(sep) >= 16
BGE CR4, loadge16 // Load for len(sep) >= 16
#ifndef GOPPC64_power10
SUB R6, R16, R9 // 16-len of sep
SLD $3, R9 // Set up for VSLO
MTVSRD R9, V9 // Set up for VSLO
VSLDOI $8, V9, V9, V9 // Set up for VSLO
VSLO ONES, V9, SEPMASK // Mask for separator len(sep) < 16
#endif
loadge16:
ANDCC $15, R5, R9 // Find byte offset of sep
ADD R9, R6, R10 // Add sep len
CMP R10, $16 // Check if sep len+offset > 16
BGT sepcross16 // Sep crosses 16 byte boundary
#ifdef GOPPC64_power10
LXVLL R5, R14, V0 // Load separator
#else
RLDICR $0, R5, $59, R8 // Adjust addr to 16 byte container
LXVB16X (R8)(R0), V0 // Load 16 bytes @R8 into V0
SLD $3, R9 // Set up shift count for VSLO
MTVSRD R9, V8 // Set up shift count for VSLO
VSLDOI $8, V8, V8, V8
VSLO V0, V8, V0 // Shift by start byte
VAND V0, SEPMASK, V0 // Mask separator (< 16)
#endif
BR index2plus
sepcross16:
#ifdef GOPPC64_power10
LXVLL R5, R14, V0 // Load separator
#else
LXVB16X (R5)(R0), V0 // Load 16 bytes @R5 into V0\
VAND V0, SEPMASK, V0 // mask out separator
#endif
BLE CR4, index2to16
BR index17plus // Handle sep > 16
index2plus:
CMP R6, $2 // Check length of sep
BNE index3plus // If not 2, check for 3
ADD $16, R7, R9 // Check if next 16 bytes past last
CMP R9, LASTBYTE // compare with last
BGE index2to16 // 2 <= len(string) <= 16
MOVD $0xff00, R21 // Mask for later
MTVSRD R21, V25 // Move to Vreg
VSPLTH $3, V25, V31 // Splat mask
VSPLTH $0, V0, V1 // Splat 1st 2 bytes of sep
VSPLTISB $0, V10 // Clear V10
// First case: 2 byte separator
// V1: 2 byte separator splatted
// V2: 16 bytes at addr
// V4: 16 bytes at addr+1
// Compare 2 byte separator at start
// and at start+1. Use VSEL to combine
// those results to find the first
// matching start byte, returning
// that value when found. Loop as
// long as len(string) > 16
index2loop2:
LXVB16X (R7)(R19), V3 // Load 16 bytes @R7+1 into V3
index2loop:
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
VCMPEQUH V1, V2, V5 // Search for sep
VCMPEQUH V1, V3, V6 // Search for sep offset by 1
VSEL V6, V5, V31, V7 // merge even and odd indices
VCLZD V7, V18 // find index of first match
MFVSRD V18, R25 // get first value
CMP R25, $64 // Found if < 64
BLT foundR25 // Return byte index where found
MFVSRLD V18, R25 // get second value
CMP R25, $64 // Found if < 64
ADD $64, R25 // Update byte offset
BLT foundR25 // Return value
ADD $16, R7 // R7+=16 Update string pointer
ADD $17, R7, R9 // R9=F7+17 since loop unrolled
CMP R9, LASTBYTE // Compare addr+17 against last byte
BLT index2loop2 // If < last, continue loop
CMP R7, LASTBYTE // Compare addr+16 against last byte
BLT index2to16 // If < 16 handle specially
LXVB16X (R7)(R0), V3 // Load 16 bytes @R7 into V3
VSLDOI $1, V3, V10, V3 // Shift left by 1 byte
BR index2loop
index3plus:
CMP R6, $3 // Check if sep == 3
BNE index4plus // If not check larger
ADD $19, R7, R9 // Find bytes for use in this loop
CMP R9, LASTBYTE // Compare against last byte
BGE index2to16 // Remaining string 2<=len<=16
MOVD $0xff00, R21 // Set up mask for upcoming loop
MTVSRD R21, V25 // Move mask to Vreg
VSPLTH $3, V25, V31 // Splat mask
VSPLTH $0, V0, V1 // Splat 1st two bytes of sep
VSPLTB $2, V0, V8 // Splat 3rd byte of sep
// Loop to process 3 byte separator.
// string[0:16] is in V2
// string[2:18] is in V3
// sep[0:2] splatted in V1
// sec[3] splatted in v8
// Load vectors at string, string+1
// and string+2. Compare string, string+1
// against first 2 bytes of separator
// splatted, and string+2 against 3rd
// byte splatted. Merge the results with
// VSEL to find the first byte of a match.
// Special handling for last 16 bytes if the
// string fits in 16 byte multiple.
index3loop2:
MOVD $2, R21 // Set up index for 2
VSPLTISB $0, V10 // Clear V10
LXVB16X (R7)(R21), V3 // Load 16 bytes @R7+2 into V3
VSLDOI $14, V3, V10, V3 // Left justify next 2 bytes
index3loop:
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7
VSLDOI $1, V2, V3, V4 // string[1:17]
VSLDOI $2, V2, V3, V9 // string[2:18]
VCMPEQUH V1, V2, V5 // compare hw even indices
VCMPEQUH V1, V4, V6 // compare hw odd indices
VCMPEQUB V8, V9, V10 // compare 3rd to last byte
VSEL V6, V5, V31, V7 // Find 1st matching byte using mask
VAND V7, V10, V7 // AND matched bytes with matched 3rd byte
VCLZD V7, V18 // Find first nonzero indexes
MFVSRD V18, R25 // Move 1st doubleword
CMP R25, $64 // If < 64 found
BLT foundR25 // Return matching index
MFVSRLD V18, R25 // Move 2nd doubleword
CMP R25, $64 // If < 64 found
ADD $64, R25 // Update byte index
BLT foundR25 // Return matching index
ADD $16, R7 // R7+=16 string ptr
ADD $19, R7, R9 // Number of string bytes for loop
CMP R9, LASTBYTE // Compare against last byte of string
BLT index3loop2 // If within, continue this loop
CMP R7, LASTSTR // Compare against last start byte
BLT index2to16 // Process remainder
VSPLTISB $0, V3 // Special case for last 16 bytes
BR index3loop // Continue this loop
// Loop to process 4 byte separator
// string[0:16] in V2
// string[3:16] in V3
// sep[0:4] splatted in V1
// Set up vectors with strings at offsets
// 0, 1, 2, 3 and compare against the 4 byte
// separator also splatted. Use VSEL with the
// compare results to find the first byte where
// a separator match is found.
index4plus:
CMP R6, $4 // Check if 4 byte separator
BNE index5plus // If not next higher
ADD $20, R7, R9 // Check string size to load
CMP R9, LASTBYTE // Verify string length
BGE index2to16 // If not large enough, process remaining
// Set up masks for use with VSEL
MOVD $0xff, R21 // Set up mask 0xff000000ff000000...
SLD $24, R21
MTVSRWS R21, V29
VSLDOI $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
MOVD $0xffff, R21
SLD $16, R21
MTVSRWS R21, V31
VSPLTW $0, V0, V1 // Splat 1st word of separator
index4loop:
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
next4:
VSPLTISB $0, V10 // Clear
MOVD $3, R9 // Number of bytes beyond 16
LXVB16X (R7)(R9), V3 // Load 16 bytes @R7 into V3
VSLDOI $13, V3, V10, V3 // Shift left last 3 bytes
VSLDOI $1, V2, V3, V4 // V4=(V2:V3)<<1
VSLDOI $2, V2, V3, V9 // V9=(V2:V3)<<2
VSLDOI $3, V2, V3, V10 // V10=(V2:v3)<<3
VCMPEQUW V1, V2, V5 // compare index 0, 4, ... with sep
VCMPEQUW V1, V4, V6 // compare index 1, 5, ... with sep
VCMPEQUW V1, V9, V11 // compare index 2, 6, ... with sep
VCMPEQUW V1, V10, V12 // compare index 3, 7, ... with sep
VSEL V6, V5, V29, V13 // merge index 0, 1, 4, 5, using mask
VSEL V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
VSEL V14, V13, V31, V7 // final merge
VCLZD V7, V18 // Find first index for each half
MFVSRD V18, R25 // Isolate value
CMP R25, $64 // If < 64, found
BLT foundR25 // Return found index
MFVSRLD V18, R25 // Isolate other value
CMP R25, $64 // If < 64, found
ADD $64, R25 // Update index for high doubleword
BLT foundR25 // Return found index
ADD $16, R7 // R7+=16 for next string
ADD $20, R7, R9 // R+20 for all bytes to load
CMP R9, LASTBYTE // Past end? Maybe check for extra?
BLT index4loop // If not, continue loop
CMP R7, LASTSTR // Check remainder
BLE index2to16 // Process remainder
BR notfound // Not found
index5plus:
CMP R6, $16 // Check for sep > 16
BGT index17plus // Handle large sep
// Assumption is that the separator is smaller than the string at this point
index2to16:
CMP R7, LASTSTR // Compare last start byte
BGT notfound // last takes len(sep) into account
ADD $19, R7, R9 // To check 4 indices per iteration, need at least 16+3 bytes
CMP R9, LASTBYTE
// At least 16 bytes of string left
// Mask the number of bytes in sep
VSPLTISB $0, V10 // Clear
BGT index2to16tail
#ifdef GOPPC64_power10
ADD $3,R7, R17 // Base+3
ADD $2,R7, R8 // Base+2
ADD $1,R7, R10 // Base+1
#else
MOVD $3, R17 // Number of bytes beyond 16
#endif
PCALIGN $16
index2to16loop:
#ifdef GOPPC64_power10
LXVLL R7, R14, V8 // Load next 16 bytes of string from Base
LXVLL R10, R14, V9 // Load next 16 bytes of string from Base+1
LXVLL R8, R14, V11 // Load next 16 bytes of string from Base+2
LXVLL R17,R14, V12 // Load next 16 bytes of string from Base+3
#else
LXVB16X (R7)(R0), V1 // Load next 16 bytes of string into V1 from R7
LXVB16X (R7)(R17), V5 // Load next 16 bytes of string into V5 from R7+3
VSLDOI $13, V5, V10, V2 // Shift left last 3 bytes
VSLDOI $1, V1, V2, V3 // V3=(V1:V2)<<1
VSLDOI $2, V1, V2, V4 // V4=(V1:V2)<<2
VAND V1, SEPMASK, V8 // Mask out sep size 0th index
VAND V3, SEPMASK, V9 // Mask out sep size 1st index
VAND V4, SEPMASK, V11 // Mask out sep size 2nd index
VAND V5, SEPMASK, V12 // Mask out sep size 3rd index
#endif
VCMPEQUBCC V0, V8, V8 // compare masked string
BLT CR6, found // All equal while comparing 0th index
VCMPEQUBCC V0, V9, V9 // compare masked string
BLT CR6, found2 // All equal while comparing 1st index
VCMPEQUBCC V0, V11, V11 // compare masked string
BLT CR6, found3 // All equal while comparing 2nd index
VCMPEQUBCC V0, V12, V12 // compare masked string
BLT CR6, found4 // All equal while comparing 3rd index
ADD $4, R7 // Update ptr to next 4 bytes
#ifdef GOPPC64_power10
ADD $4, R17 // Update ptr to next 4 bytes
ADD $4, R8 // Update ptr to next 4 bytes
ADD $4, R10 // Update ptr to next 4 bytes
#endif
CMP R7, LASTSTR // Still less than last start byte
BGT notfound // Not found
ADD $19, R7, R9 // Verify remaining bytes
CMP R9, LASTBYTE // length of string at least 19
BLE index2to16loop // Try again, else do post processing and jump to index2to16next
PCALIGN $32
// <19 bytes left, post process the remaining string
index2to16tail:
#ifdef GOPPC64_power10
index2to16next_p10:
LXVLL R7,R14, V1 // Load 16 bytes @R7 into V1
VCMPEQUBCC V1, V0, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BLE index2to16next_p10 // If at end, then not found
BR notfound // go to remainder loop
#else
ADD R3, R4, R9 // End of string
SUB R7, R9, R9 // Number of bytes left
ANDCC $15, R7, R10 // 16 byte offset
ADD R10, R9, R11 // offset + len
CMP R11, $16 // >= 16?
BLE short // Does not cross 16 bytes
LXVB16X (R7)(R0), V1 // Load 16 bytes @R7 into V1
CMP R9, $16 // Post-processing of unrolled loop
BLE index2to16next // continue to index2to16next if <= 16 bytes
SUB R16, R9, R10 // R9 should be 18 or 17 hence R10 is 1 or 2
LXVB16X (R7)(R10), V9
CMP R10, $1 // string length is 17, compare 1 more byte
BNE extra2 // string length is 18, compare 2 more bytes
VSLDOI $15, V9, V10, V25
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BGT notfound // If at end, then not found
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte
BR index2to16next // go to remainder loop
extra2:
VSLDOI $14, V9, V10, V25
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BGT notfound // If at end, then not found
VOR V1, V1, V4 // save remaining string
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte for 17th byte
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BGT notfound // If at end, then not found
VSLDOI $2, V4, V25, V1 // Shift saved string left by 2 bytes for 18th byte
BR index2to16next // Check the remaining partial string in index2to16next
short:
RLDICR $0, R7, $59, R9 // Adjust addr to 16 byte container
LXVB16X (R9)(R0), V1 // Load 16 bytes @R9 into V1
SLD $3, R10 // Set up shift
MTVSRD R10, V8 // Set up shift
VSLDOI $8, V8, V8, V8
VSLO V1, V8, V1 // Shift by start byte
PCALIGN $16
index2to16next:
VAND V1, SEPMASK, V2 // Just compare size of sep
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
BLT CR6, found // Found
ADD $1, R7 // Not found, try next partial string
CMP R7, LASTSTR // Check for end of string
BGT notfound // If at end, then not found
VSLDOI $1, V1, V10, V1 // Shift string left by 1 byte
BR index2to16next // Check the next partial string
#endif // Tail processing if GOPPC64!=power10
index17plus:
CMP R6, $32 // Check if 17 < len(sep) <= 32
BGT index33plus
SUB $16, R6, R9 // Extra > 16
SLD $56, R9, R10 // Shift to use in VSLO
MTVSRD R10, V9 // Set up for VSLO
LXVB16X (R5)(R9), V1 // Load 16 bytes @R5+R9 into V1
VSLO V1, V9, V1 // Shift left
VSPLTISB $0xff, V7 // Splat 1s
VSPLTISB $0, V27 // Splat 0
index17to32loop:
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
next17:
LXVB16X (R7)(R9), V3 // Load 16 bytes @R7+R9 into V3
VSLO V3, V9, V3 // Shift left
VCMPEQUB V0, V2, V4 // Compare first 16 bytes
VCMPEQUB V1, V3, V5 // Compare extra over 16 bytes
VAND V4, V5, V6 // Check if both equal
VCMPEQUBCC V6, V7, V8 // All equal?
BLT CR6, found // Yes
ADD $1, R7 // On to next byte
CMP R7, LASTSTR // Check if last start byte
BGT notfound // If too high, not found
BR index17to32loop // Continue
notfound:
MOVD $-1, R3 // Return -1 if not found
RET
index33plus:
MOVD $0, (R0) // Case not implemented
RET // Crash before return
foundR25:
SRD $3, R25 // Convert from bits to bytes
ADD R25, R7 // Add to current string address
SUB R3, R7 // Subtract from start of string
MOVD R7, R3 // Return byte where found
RET
found4:
ADD $1, R7 // found from unrolled loop at index 3
found3:
ADD $1, R7 // found from unrolled loop at index 2
found2:
ADD $1, R7 // found from unrolled loop at index 1
found: // found at index 0
SUB R3, R7 // Return byte where found
MOVD R7, R3
RET

View File

@@ -0,0 +1,31 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
import "internal/cpu"
const MaxBruteForce = 64
func init() {
// Note: we're kind of lucky that this flag is available at this point.
// The runtime sets HasVX when processing auxv records, and that happens
// to happen *before* running the init functions of packages that
// the runtime depends on.
// TODO: it would really be nicer for internal/cpu to figure out this
// flag by itself. Then we wouldn't need to depend on quirks of
// early startup initialization order.
if cpu.S390X.HasVX {
MaxLen = 64
}
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func Cutover(n int) int {
// 1 error per 8 characters, plus a few slop to start.
return (n + 16) / 8
}

View File

@@ -0,0 +1,216 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// Caller must confirm availability of vx facility before calling.
TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
MOVD $ret+48(FP), R5
BR indexbody<>(SB)
// Caller must confirm availability of vx facility before calling.
TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
MOVD $ret+32(FP), R5
BR indexbody<>(SB)
// s: string we are searching
// sep: string to search for
// R1=&s[0], R2=len(s)
// R3=&sep[0], R4=len(sep)
// R5=&ret (int)
// Caller must confirm availability of vx facility before calling.
TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
CMPBGT R4, R2, notfound
ADD R1, R2
SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
CMPBEQ R4, $0, notfound
SUB $1, R4 // R4=len(sep)-1 for use as VLL index
VLL R4, (R3), V0 // contains first 16 bytes of sep
MOVD R1, R7
index2plus:
CMPBNE R4, $1, index3plus
MOVD $15(R7), R9
CMPBGE R9, R2, index2to16
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
VONE V16
VREPH $0, V0, V1
CMPBGE R9, R2, index2to16
index2loop:
VL 0(R7), V2 // 16 bytes, even indices
VL 1(R7), V4 // 16 bytes, odd indices
VCEQH V1, V2, V5 // compare even indices
VCEQH V1, V4, V6 // compare odd indices
VSEL V5, V6, V31, V7 // merge even and odd indices
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
BLT foundV17
MOVD $16(R7), R7 // R7+=16
ADD $15, R7, R9
CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
CMPBLE R7, R2, index2to16
BR notfound
index3plus:
CMPBNE R4, $2, index4plus
ADD $15, R7, R9
CMPBGE R9, R2, index2to16
MOVD $1, R0
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
VONE V16
VREPH $0, V0, V1
VREPB $2, V0, V8
index3loop:
VL (R7), V2 // load 16-bytes into V2
VLL R0, 16(R7), V3 // load 2-bytes into V3
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
VCEQH V1, V2, V5 // compare 2-byte even indices
VCEQH V1, V4, V6 // compare 2-byte odd indices
VCEQB V8, V9, V10 // compare last bytes
VSEL V5, V6, V31, V7 // merge even and odd indices
VN V7, V10, V7 // AND indices with last byte
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
BLT foundV17
MOVD $16(R7), R7 // R7+=16
ADD $15, R7, R9
CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
CMPBLE R7, R2, index2to16
BR notfound
index4plus:
CMPBNE R4, $3, index5plus
ADD $15, R7, R9
CMPBGE R9, R2, index2to16
MOVD $2, R0
VGBM $0x8888, V29 // 0xff000000ff000000...
VGBM $0x2222, V30 // 0x0000ff000000ff00...
VGBM $0xcccc, V31 // 0xffff0000ffff0000...
VONE V16
VREPF $0, V0, V1
index4loop:
VL (R7), V2 // load 16-bytes into V2
VLL R0, 16(R7), V3 // load 3-bytes into V3
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
VCEQF V1, V2, V5 // compare index 0, 4, ...
VCEQF V1, V4, V6 // compare index 1, 5, ...
VCEQF V1, V9, V11 // compare index 2, 6, ...
VCEQF V1, V10, V12 // compare index 3, 7, ...
VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
VSEL V13, V14, V31, V7 // final merge
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
BLT foundV17
MOVD $16(R7), R7 // R7+=16
ADD $15, R7, R9
CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
CMPBLE R7, R2, index2to16
BR notfound
index5plus:
CMPBGT R4, $15, index17plus
index2to16:
CMPBGT R7, R2, notfound
MOVD $1(R7), R8
CMPBGT R8, R2, index2to16tail
index2to16loop:
// unrolled 2x
VLL R4, (R7), V1
VLL R4, 1(R7), V2
VCEQGS V0, V1, V3
BEQ found
MOVD $1(R7), R7
VCEQGS V0, V2, V4
BEQ found
MOVD $1(R7), R7
CMPBLT R7, R2, index2to16loop
CMPBGT R7, R2, notfound
index2to16tail:
VLL R4, (R7), V1
VCEQGS V0, V1, V2
BEQ found
BR notfound
index17plus:
CMPBGT R4, $31, index33plus
SUB $16, R4, R0
VLL R0, 16(R3), V1
VONE V7
index17to32loop:
VL (R7), V2
VLL R0, 16(R7), V3
VCEQG V0, V2, V4
VCEQG V1, V3, V5
VN V4, V5, V6
VCEQGS V6, V7, V8
BEQ found
MOVD $1(R7), R7
CMPBLE R7, R2, index17to32loop
BR notfound
index33plus:
CMPBGT R4, $47, index49plus
SUB $32, R4, R0
VL 16(R3), V1
VLL R0, 32(R3), V2
VONE V11
index33to48loop:
VL (R7), V3
VL 16(R7), V4
VLL R0, 32(R7), V5
VCEQG V0, V3, V6
VCEQG V1, V4, V7
VCEQG V2, V5, V8
VN V6, V7, V9
VN V8, V9, V10
VCEQGS V10, V11, V12
BEQ found
MOVD $1(R7), R7
CMPBLE R7, R2, index33to48loop
BR notfound
index49plus:
CMPBGT R4, $63, index65plus
SUB $48, R4, R0
VL 16(R3), V1
VL 32(R3), V2
VLL R0, 48(R3), V3
VONE V15
index49to64loop:
VL (R7), V4
VL 16(R7), V5
VL 32(R7), V6
VLL R0, 48(R7), V7
VCEQG V0, V4, V8
VCEQG V1, V5, V9
VCEQG V2, V6, V10
VCEQG V3, V7, V11
VN V8, V9, V12
VN V10, V11, V13
VN V12, V13, V14
VCEQGS V14, V15, V16
BEQ found
MOVD $1(R7), R7
CMPBLE R7, R2, index49to64loop
notfound:
MOVD $-1, (R5)
RET
index65plus:
// not implemented
MOVD $0, (R0)
RET
foundV17: // index is in doubleword V17[0]
VLGVG $0, V17, R8
ADD R8, R7
found:
SUB R1, R7
MOVD R7, (R5)
RET

View File

@@ -0,0 +1,34 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-20
MOVL b_base+0(FP), SI
MOVL b_len+4(FP), CX
MOVB c+12(FP), AL
MOVL SI, DI
CLD; REPN; SCASB
JZ 3(PC)
MOVL $-1, ret+16(FP)
RET
SUBL SI, DI
SUBL $1, DI
MOVL DI, ret+16(FP)
RET
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
MOVL s_base+0(FP), SI
MOVL s_len+4(FP), CX
MOVB c+8(FP), AL
MOVL SI, DI
CLD; REPN; SCASB
JZ 3(PC)
MOVL $-1, ret+12(FP)
RET
SUBL SI, DI
SUBL $1, DI
MOVL DI, ret+12(FP)
RET

View File

@@ -0,0 +1,154 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !plan9
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB), NOSPLIT, $0-40
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), BX
MOVB c+24(FP), AL
LEAQ ret+32(FP), R8
JMP indexbytebody<>(SB)
TEXT ·IndexByteString(SB), NOSPLIT, $0-32
MOVQ s_base+0(FP), SI
MOVQ s_len+8(FP), BX
MOVB c+16(FP), AL
LEAQ ret+24(FP), R8
JMP indexbytebody<>(SB)
// input:
// SI: data
// BX: data len
// AL: byte sought
// R8: address to put result
TEXT indexbytebody<>(SB), NOSPLIT, $0
// Shuffle X0 around so that each byte contains
// the character we're looking for.
MOVD AX, X0
PUNPCKLBW X0, X0
PUNPCKLBW X0, X0
PSHUFL $0, X0, X0
CMPQ BX, $16
JLT small
MOVQ SI, DI
CMPQ BX, $32
JA avx2
sse:
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
JMP sseloopentry
PCALIGN $16
sseloop:
// Move the next 16-byte chunk of the data into X1.
MOVOU (DI), X1
// Compare bytes in X0 to X1.
PCMPEQB X0, X1
// Take the top bit of each byte in X1 and put the result in DX.
PMOVMSKB X1, DX
// Find first set bit, if any.
BSFL DX, DX
JNZ ssesuccess
// Advance to next block.
ADDQ $16, DI
sseloopentry:
CMPQ DI, AX
JB sseloop
// Search the last 16-byte chunk. This chunk may overlap with the
// chunks we've already searched, but that's ok.
MOVQ AX, DI
MOVOU (AX), X1
PCMPEQB X0, X1
PMOVMSKB X1, DX
BSFL DX, DX
JNZ ssesuccess
failure:
MOVQ $-1, (R8)
RET
// We've found a chunk containing the byte.
// The chunk was loaded from DI.
// The index of the matching byte in the chunk is DX.
// The start of the data is SI.
ssesuccess:
SUBQ SI, DI // Compute offset of chunk within data.
ADDQ DX, DI // Add offset of byte within chunk.
MOVQ DI, (R8)
RET
// handle for lengths < 16
small:
TESTQ BX, BX
JEQ failure
// Check if we'll load across a page boundary.
LEAQ 16(SI), AX
TESTW $0xff0, AX
JEQ endofpage
MOVOU (SI), X1 // Load data
PCMPEQB X0, X1 // Compare target byte with each byte in data.
PMOVMSKB X1, DX // Move result bits to integer register.
BSFL DX, DX // Find first set bit.
JZ failure // No set bit, failure.
CMPL DX, BX
JAE failure // Match is past end of data.
MOVQ DX, (R8)
RET
endofpage:
MOVOU -16(SI)(BX*1), X1 // Load data into the high end of X1.
PCMPEQB X0, X1 // Compare target byte with each byte in data.
PMOVMSKB X1, DX // Move result bits to integer register.
MOVL BX, CX
SHLL CX, DX
SHRL $16, DX // Shift desired bits down to bottom of register.
BSFL DX, DX // Find first set bit.
JZ failure // No set bit, failure.
MOVQ DX, (R8)
RET
avx2:
#ifndef hasAVX2
CMPB internalcpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
#endif
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1
PCALIGN $32
avx2_loop:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPTEST Y3, Y3
JNZ avx2success
ADDQ $32, DI
CMPQ DI, R11
JLT avx2_loop
MOVQ R11, DI
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPTEST Y3, Y3
JNZ avx2success
VZEROUPPER
MOVQ $-1, (R8)
RET
avx2success:
VPMOVMSKB Y3, DX
BSFL DX, DX
SUBQ SI, DI
ADDQ DI, DX
MOVQ DX, (R8)
VZEROUPPER
RET

View File

@@ -0,0 +1,46 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-20
MOVW b_base+0(FP), R0
MOVW b_len+4(FP), R1
MOVBU c+12(FP), R2 // byte to find
MOVW $ret+16(FP), R5
B indexbytebody<>(SB)
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
MOVW s_base+0(FP), R0
MOVW s_len+4(FP), R1
MOVBU c+8(FP), R2 // byte to find
MOVW $ret+12(FP), R5
B indexbytebody<>(SB)
// input:
// R0: data
// R1: data length
// R2: byte to find
// R5: address to put result
TEXT indexbytebody<>(SB),NOSPLIT,$0-0
MOVW R0, R4 // store base for later
ADD R0, R1 // end
loop:
CMP R0, R1
B.EQ notfound
MOVBU.P 1(R0), R3
CMP R2, R3
B.NE loop
SUB $1, R0 // R0 will be one beyond the position we want
SUB R4, R0 // remove base
MOVW R0, (R5)
RET
notfound:
MOVW $-1, R0
MOVW R0, (R5)
RET

View File

@@ -0,0 +1,126 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-40
MOVD b_base+0(FP), R0
MOVD b_len+8(FP), R2
MOVBU c+24(FP), R1
MOVD $ret+32(FP), R8
B indexbytebody<>(SB)
TEXT ·IndexByteString(SB),NOSPLIT,$0-32
MOVD s_base+0(FP), R0
MOVD s_len+8(FP), R2
MOVBU c+16(FP), R1
MOVD $ret+24(FP), R8
B indexbytebody<>(SB)
// input:
// R0: data
// R1: byte to search
// R2: data len
// R8: address to put result
TEXT indexbytebody<>(SB),NOSPLIT,$0
// Core algorithm:
// For each 32-byte chunk we calculate a 64-bit syndrome value,
// with two bits per byte. For each tuple, bit 0 is set if the
// relevant byte matched the requested character and bit 1 is
// not used (faster than using a 32bit syndrome). Since the bits
// in the syndrome reflect exactly the order in which things occur
// in the original string, counting trailing zeros allows to
// identify exactly which byte has matched.
CBZ R2, fail
MOVD R0, R11
// Magic constant 0x40100401 allows us to identify
// which lane matches the requested byte.
// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
MOVD $0x40100401, R5
VMOV R1, V0.B16
// Work with aligned 32-byte chunks
BIC $0x1f, R0, R3
VMOV R5, V5.S4
ANDS $0x1f, R0, R9
AND $0x1f, R2, R10
BEQ loop
// Input string is not 32-byte aligned. We calculate the
// syndrome value for the aligned 32 bytes block containing
// the first bytes and mask off the irrelevant part.
VLD1.P (R3), [V1.B16, V2.B16]
SUB $0x20, R9, R4
ADDS R4, R2, R2
VCMEQ V0.B16, V1.B16, V3.B16
VCMEQ V0.B16, V2.B16, V4.B16
VAND V5.B16, V3.B16, V3.B16
VAND V5.B16, V4.B16, V4.B16
VADDP V4.B16, V3.B16, V6.B16 // 256->128
VADDP V6.B16, V6.B16, V6.B16 // 128->64
VMOV V6.D[0], R6
// Clear the irrelevant lower bits
LSL $1, R9, R4
LSR R4, R6, R6
LSL R4, R6, R6
// The first block can also be the last
BLS masklast
// Have we found something already?
CBNZ R6, tail
loop:
VLD1.P (R3), [V1.B16, V2.B16]
SUBS $0x20, R2, R2
VCMEQ V0.B16, V1.B16, V3.B16
VCMEQ V0.B16, V2.B16, V4.B16
// If we're out of data we finish regardless of the result
BLS end
// Use a fast check for the termination condition
VORR V4.B16, V3.B16, V6.B16
VADDP V6.D2, V6.D2, V6.D2
VMOV V6.D[0], R6
// We're not out of data, loop if we haven't found the character
CBZ R6, loop
end:
// Termination condition found, let's calculate the syndrome value
VAND V5.B16, V3.B16, V3.B16
VAND V5.B16, V4.B16, V4.B16
VADDP V4.B16, V3.B16, V6.B16
VADDP V6.B16, V6.B16, V6.B16
VMOV V6.D[0], R6
// Only do the clear for the last possible block with less than 32 bytes
// Condition flags come from SUBS in the loop
BHS tail
masklast:
// Clear the irrelevant upper bits
ADD R9, R10, R4
AND $0x1f, R4, R4
SUB $0x20, R4, R4
NEG R4<<1, R4
LSL R4, R6, R6
LSR R4, R6, R6
tail:
// Check that we have found a character
CBZ R6, fail
// Count the trailing zeros using bit reversing
RBIT R6, R6
// Compensate the last post-increment
SUB $0x20, R3, R3
// And count the leading zeros
CLZ R6, R6
// R6 is twice the offset into the fragment
ADD R6>>1, R3, R0
// Compute the offset result
SUB R11, R0, R0
MOVD R0, (R8)
RET
fail:
MOVD $-1, R0
MOVD R0, (R8)
RET

View File

@@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Avoid IndexByte and IndexByteString on Plan 9 because it uses
// SSE instructions on x86 machines, and those are classified as
// floating point instructions, which are illegal in a note handler.
//go:build !386 && (!amd64 || plan9) && !s390x && !arm && !arm64 && !loong64 && !ppc64 && !ppc64le && !mips && !mipsle && !mips64 && !mips64le && !riscv64 && !wasm
package bytealg
func IndexByte(b []byte, c byte) int {
for i, x := range b {
if x == c {
return i
}
}
return -1
}
func IndexByteString(s string, c byte) int {
for i := 0; i < len(s); i++ {
if s[i] == c {
return i
}
}
return -1
}

View File

@@ -0,0 +1,52 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
// R4 = b_base
// R5 = b_len
// R6 = b_cap (unused)
// R7 = byte to find
AND $0xff, R7
MOVV R4, R6 // store base for later
ADDV R4, R5 // end
ADDV $-1, R4
PCALIGN $16
loop:
ADDV $1, R4
BEQ R4, R5, notfound
MOVBU (R4), R8
BNE R7, R8, loop
SUBV R6, R4 // remove base
RET
notfound:
MOVV $-1, R4
RET
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
// R4 = s_base
// R5 = s_len
// R6 = byte to find
MOVV R4, R7 // store base for later
ADDV R4, R5 // end
ADDV $-1, R4
PCALIGN $16
loop:
ADDV $1, R4
BEQ R4, R5, notfound
MOVBU (R4), R8
BNE R6, R8, loop
SUBV R7, R4 // remove base
RET
notfound:
MOVV $-1, R4
RET

View File

@@ -0,0 +1,54 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips64 || mips64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-40
MOVV b_base+0(FP), R1
MOVV b_len+8(FP), R2
MOVBU c+24(FP), R3 // byte to find
MOVV R1, R4 // store base for later
ADDV R1, R2 // end
ADDV $-1, R1
loop:
ADDV $1, R1
BEQ R1, R2, notfound
MOVBU (R1), R5
BNE R3, R5, loop
SUBV R4, R1 // remove base
MOVV R1, ret+32(FP)
RET
notfound:
MOVV $-1, R1
MOVV R1, ret+32(FP)
RET
TEXT ·IndexByteString(SB),NOSPLIT,$0-32
MOVV s_base+0(FP), R1
MOVV s_len+8(FP), R2
MOVBU c+16(FP), R3 // byte to find
MOVV R1, R4 // store base for later
ADDV R1, R2 // end
ADDV $-1, R1
loop:
ADDV $1, R1
BEQ R1, R2, notfound
MOVBU (R1), R5
BNE R3, R5, loop
SUBV R4, R1 // remove base
MOVV R1, ret+24(FP)
RET
notfound:
MOVV $-1, R1
MOVV R1, ret+24(FP)
RET

View File

@@ -0,0 +1,52 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips || mipsle
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-20
MOVW b_base+0(FP), R1
MOVW b_len+4(FP), R2
MOVBU c+12(FP), R3 // byte to find
ADDU $1, R1, R4 // store base+1 for later
ADDU R1, R2 // end
loop:
BEQ R1, R2, notfound
MOVBU (R1), R5
ADDU $1, R1
BNE R3, R5, loop
SUBU R4, R1 // R1 will be one beyond the position we want so remove (base+1)
MOVW R1, ret+16(FP)
RET
notfound:
MOVW $-1, R1
MOVW R1, ret+16(FP)
RET
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
MOVW s_base+0(FP), R1
MOVW s_len+4(FP), R2
MOVBU c+8(FP), R3 // byte to find
ADDU $1, R1, R4 // store base+1 for later
ADDU R1, R2 // end
loop:
BEQ R1, R2, notfound
MOVBU (R1), R5
ADDU $1, R1
BNE R3, R5, loop
SUBU R4, R1 // remove (base+1)
MOVW R1, ret+12(FP)
RET
notfound:
MOVW $-1, R1
MOVW R1, ret+12(FP)
RET

View File

@@ -0,0 +1,13 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build 386 || (amd64 && !plan9) || s390x || arm || arm64 || loong64 || ppc64 || ppc64le || mips || mipsle || mips64 || mips64le || riscv64 || wasm
package bytealg
//go:noescape
func IndexByte(b []byte, c byte) int
//go:noescape
func IndexByteString(s string, c byte) int

View File

@@ -0,0 +1,314 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// R3 = byte array pointer
// R4 = length
MOVD R6, R5 // R5 = byte
BR indexbytebody<>(SB)
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
// R3 = string
// R4 = length
// R5 = byte
BR indexbytebody<>(SB)
#ifndef GOPPC64_power9
#ifdef GOARCH_ppc64le
DATA indexbytevbperm<>+0(SB)/8, $0x3830282018100800
DATA indexbytevbperm<>+8(SB)/8, $0x7870686058504840
#else
DATA indexbytevbperm<>+0(SB)/8, $0x0008101820283038
DATA indexbytevbperm<>+8(SB)/8, $0x4048505860687078
#endif
GLOBL indexbytevbperm<>+0(SB), RODATA, $16
#endif
// Some operations are endian specific, choose the correct opcode base on GOARCH.
// Note, _VCZBEBB is only available on power9 and newer.
#ifdef GOARCH_ppc64le
#define _LDBEX MOVDBR
#define _LWBEX MOVWBR
#define _LHBEX MOVHBR
#define _VCZBEBB VCTZLSBB
#else
#define _LDBEX MOVD
#define _LWBEX MOVW
#define _LHBEX MOVH
#define _VCZBEBB VCLZLSBB
#endif
// R3 = addr of string
// R4 = len of string
// R5 = byte to find
// On exit:
// R3 = return value
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPU R4,$32
#ifndef GOPPC64_power9
// Load VBPERMQ constant to reduce compare into an ordered bit mask.
MOVD $indexbytevbperm<>+00(SB),R16
LXVD2X (R16),V0 // Set up swap string
#endif
MTVRD R5,V1
VSPLTB $7,V1,V1 // Replicate byte across V1
BLT cmp16 // Jump to the small string case if it's <32 bytes.
CMP R4,$64,CR1
MOVD $16,R11
MOVD R3,R8
BLT CR1,cmp32 // Special case for length 32 - 63
MOVD $32,R12
MOVD $48,R6
RLDICR $0,R4,$63-6,R9 // R9 = len &^ 63
ADD R3,R9,R9 // R9 = &s[len &^ 63]
ANDCC $63,R4 // (len &= 63) cmp 0.
PCALIGN $16
loop64:
LXVD2X (R0)(R8),V2 // Scan 64 bytes at a time, starting at &s[0]
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0 // Match found at R8, jump out
LXVD2X (R11)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat1 // Match found at R8+16 bytes, jump out
LXVD2X (R12)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat2 // Match found at R8+32 bytes, jump out
LXVD2X (R6)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat3 // Match found at R8+48 bytes, jump out
ADD $64,R8
CMPU R8,R9,CR1
BNE CR1,loop64 // R8 != &s[len &^ 63]?
PCALIGN $32
BEQ notfound // Is tail length 0? CR0 is set before entering loop64.
CMP R4,$32 // Tail length >= 32, use cmp32 path.
CMP R4,$16,CR1
BGE cmp32
ADD R8,R4,R9
ADD $-16,R9
BLE CR1,cmp64_tail_gt0
cmp64_tail_gt16: // Tail length 17 - 32
LXVD2X (R0)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0
cmp64_tail_gt0: // Tail length 1 - 16
MOVD R9,R8
LXVD2X (R0)(R9),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0
BR notfound
cmp32: // Length 32 - 63
// Bytes 0 - 15
LXVD2X (R0)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0
// Bytes 16 - 31
LXVD2X (R8)(R11),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat1 // Match found at R8+16 bytes, jump out
BEQ notfound // Is length <= 32? (CR0 holds this comparison on entry to cmp32)
CMP R4,$48
ADD R4,R8,R9 // Compute &s[len(s)-16]
ADD $32,R8,R8
ADD $-16,R9,R9
ISEL CR0GT,R8,R9,R8 // R8 = len(s) <= 48 ? R9 : R8
// Bytes 33 - 47
LXVD2X (R0)(R8),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0 // match found at R8+32 bytes, jump out
BLE notfound
// Bytes 48 - 63
MOVD R9,R8 // R9 holds the final check.
LXVD2X (R0)(R9),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0 // Match found at R8+48 bytes, jump out
BR notfound
// If ISA 3.0 instructions are unavailable, we need to account for the extra 16 added by CNTLZW.
#ifndef GOPPC64_power9
#define ADJUST_FOR_CNTLZW -16
#else
#define ADJUST_FOR_CNTLZW 0
#endif
// Now, find the index of the 16B vector the match was discovered in. If CNTLZW is used
// to determine the offset into the 16B vector, it will overcount by 16. Account for it here.
foundat3:
SUB R3,R8,R3
ADD $48+ADJUST_FOR_CNTLZW,R3
BR vfound
foundat2:
SUB R3,R8,R3
ADD $32+ADJUST_FOR_CNTLZW,R3
BR vfound
foundat1:
SUB R3,R8,R3
ADD $16+ADJUST_FOR_CNTLZW,R3
BR vfound
foundat0:
SUB R3,R8,R3
ADD $0+ADJUST_FOR_CNTLZW,R3
vfound:
// Map equal values into a 16 bit value with earlier matches setting higher bits.
#ifndef GOPPC64_power9
VBPERMQ V6,V0,V6
MFVRD V6,R4
CNTLZW R4,R4
#else
#ifdef GOARCH_ppc64le
// Put the value back into LE ordering by swapping doublewords.
XXPERMDI V6,V6,$2,V6
#endif
_VCZBEBB V6,R4
#endif
ADD R3,R4,R3
RET
cmp16: // Length 16 - 31
CMPU R4,$16
ADD R4,R3,R9
BLT cmp8
ADD $-16,R9,R9 // &s[len(s)-16]
// Bytes 0 - 15
LXVD2X (R0)(R3),V2
VCMPEQUBCC V2,V1,V6
MOVD R3,R8
BNE CR6,foundat0 // Match found at R8+32 bytes, jump out
BEQ notfound
// Bytes 16 - 30
MOVD R9,R8 // R9 holds the final check.
LXVD2X (R0)(R9),V2
VCMPEQUBCC V2,V1,V6
BNE CR6,foundat0 // Match found at R8+48 bytes, jump out
BR notfound
cmp8: // Length 8 - 15
#ifdef GOPPC64_power10
// Load all the bytes into a single VSR in BE order.
SLD $56,R4,R5
LXVLL R3,R5,V2
// Compare and count the number which don't match.
VCMPEQUB V2,V1,V6
VCLZLSBB V6,R3
// If count is the number of bytes, or more. No matches are found.
CMPU R3,R4
MOVD $-1,R5
// Otherwise, the count is the index of the first match.
ISEL CR0LT,R3,R5,R3
RET
#else
RLDIMI $8,R5,$48,R5 // Replicating the byte across the register.
RLDIMI $16,R5,$32,R5
RLDIMI $32,R5,$0,R5
CMPU R4,$8
BLT cmp4
MOVD $-8,R11
ADD $-8,R4,R4
_LDBEX (R0)(R3),R10
_LDBEX (R11)(R9),R11
CMPB R10,R5,R10
CMPB R11,R5,R11
CMPU R10,$0
CMPU R11,$0,CR1
CNTLZD R10,R10
CNTLZD R11,R11
SRD $3,R10,R3
SRD $3,R11,R11
BNE found
ADD R4,R11,R4
MOVD $-1,R3
ISEL CR1EQ,R3,R4,R3
RET
cmp4: // Length 4 - 7
CMPU R4,$4
BLT cmp2
MOVD $-4,R11
ADD $-4,R4,R4
_LWBEX (R0)(R3),R10
_LWBEX (R11)(R9),R11
CMPB R10,R5,R10
CMPB R11,R5,R11
CNTLZW R10,R10
CNTLZW R11,R11
CMPU R10,$32
CMPU R11,$32,CR1
SRD $3,R10,R3
SRD $3,R11,R11
BNE found
ADD R4,R11,R4
MOVD $-1,R3
ISEL CR1EQ,R3,R4,R3
RET
cmp2: // Length 2 - 3
CMPU R4,$2
BLT cmp1
_LHBEX (R0)(R3),R10
CMPB R10,R5,R10
SLDCC $48,R10,R10
CNTLZD R10,R10
SRD $3,R10,R3
BNE found
cmp1: // Length 1
MOVD $-1,R3
ANDCC $1,R4,R31
BEQ found
MOVBZ -1(R9),R10
CMPB R10,R5,R10
ANDCC $1,R10
ADD $-1,R4
ISEL CR0EQ,R3,R4,R3
found:
RET
#endif
notfound:
MOVD $-1,R3
RET

View File

@@ -0,0 +1,51 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
// X10 = b_base
// X11 = b_len
// X12 = b_cap (unused)
// X13 = byte to find
AND $0xff, X13
MOV X10, X12 // store base for later
ADD X10, X11 // end
SUB $1, X10
loop:
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X13, X14, loop
SUB X12, X10 // remove base
RET
notfound:
MOV $-1, X10
RET
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
// X10 = b_base
// X11 = b_len
// X12 = byte to find
AND $0xff, X12
MOV X10, X13 // store base for later
ADD X10, X11 // end
SUB $1, X10
loop:
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X12, X14, loop
SUB X13, X10 // remove base
RET
notfound:
MOV $-1, X10
RET

View File

@@ -0,0 +1,108 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
MOVD b_base+0(FP), R3// b_base => R3
MOVD b_len+8(FP), R4 // b_len => R4
MOVBZ c+24(FP), R5 // c => R5
MOVD $ret+32(FP), R2 // &ret => R9
BR indexbytebody<>(SB)
TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
MOVD s_base+0(FP), R3// s_base => R3
MOVD s_len+8(FP), R4 // s_len => R4
MOVBZ c+16(FP), R5 // c => R5
MOVD $ret+24(FP), R2 // &ret => R9
BR indexbytebody<>(SB)
// input:
// R3: s
// R4: s_len
// R5: c -- byte sought
// R2: &ret -- address to put index into
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
CMPBEQ R4, $0, notfound
MOVD R3, R6 // store base for later
ADD R3, R4, R8 // the address after the end of the string
//if the length is small, use loop; otherwise, use vector or srst search
CMPBGE R4, $16, large
residual:
CMPBEQ R3, R8, notfound
MOVBZ 0(R3), R7
LA 1(R3), R3
CMPBNE R7, R5, residual
found:
SUB R6, R3
SUB $1, R3
MOVD R3, 0(R2)
RET
notfound:
MOVD $-1, 0(R2)
RET
large:
MOVBZ internalcpu·S390X+const_offsetS390xHasVX(SB), R1
CMPBNE R1, $0, vectorimpl
srstimpl: // no vector facility
MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
srstloop:
WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8))
BVS srstloop // interrupted - continue
BGT notfoundr0
foundr0:
XOR R0, R0 // reset R0
SUB R6, R8 // remove base
MOVD R8, 0(R2)
RET
notfoundr0:
XOR R0, R0 // reset R0
MOVD $-1, 0(R2)
RET
vectorimpl:
//if the address is not 16byte aligned, use loop for the header
MOVD R3, R8
AND $15, R8
CMPBGT R8, $0, notaligned
aligned:
ADD R6, R4, R8
MOVD R8, R7
AND $-16, R7
// replicate c across V17
VLVGB $0, R5, V19
VREPB $0, V19, V17
vectorloop:
CMPBGE R3, R7, residual
VL 0(R3), V16 // load string to be searched into V16
ADD $16, R3
VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly
BVS vectorloop
// when vector search found c in the string
VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7
SUB $16, R3
SUB R6, R3
ADD R3, R7
MOVD R7, 0(R2)
RET
notaligned:
MOVD R3, R8
AND $-16, R8
ADD $16, R8
notalignedloop:
CMPBEQ R3, R8, aligned
MOVBZ 0(R3), R7
LA 1(R3), R3
CMPBNE R7, R5, notalignedloop
BR found

View File

@@ -0,0 +1,195 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB), NOSPLIT, $0-40
I64Load b_base+0(FP)
I32WrapI64
I32Load8U c+24(FP)
I64Load b_len+8(FP)
I32WrapI64
Call memchr<>(SB)
I64ExtendI32U
Set R0
Get SP
I64Const $-1
Get R0
I64Load b_base+0(FP)
I64Sub
Get R0
I64Eqz $0
Select
I64Store ret+32(FP)
RET
TEXT ·IndexByteString(SB), NOSPLIT, $0-32
Get SP
I64Load s_base+0(FP)
I32WrapI64
I32Load8U c+16(FP)
I64Load s_len+8(FP)
I32WrapI64
Call memchr<>(SB)
I64ExtendI32U
Set R0
I64Const $-1
Get R0
I64Load s_base+0(FP)
I64Sub
Get R0
I64Eqz $0
Select
I64Store ret+24(FP)
RET
// initially compiled with emscripten and then modified over time.
// params:
// R0: s
// R1: c
// R2: len
// ret: index
TEXT memchr<>(SB), NOSPLIT, $0
Get R1
Set R4
Block
Block
Get R2
I32Const $0
I32Ne
Tee R3
Get R0
I32Const $3
I32And
I32Const $0
I32Ne
I32And
If
Loop
Get R0
I32Load8U $0
Get R1
I32Eq
BrIf $2
Get R2
I32Const $-1
I32Add
Tee R2
I32Const $0
I32Ne
Tee R3
Get R0
I32Const $1
I32Add
Tee R0
I32Const $3
I32And
I32Const $0
I32Ne
I32And
BrIf $0
End
End
Get R3
BrIf $0
I32Const $0
Set R1
Br $1
End
Get R0
I32Load8U $0
Get R4
Tee R3
I32Eq
If
Get R2
Set R1
Else
Get R4
I32Const $16843009
I32Mul
Set R4
Block
Block
Get R2
I32Const $3
I32GtU
If
Get R2
Set R1
Loop
Get R0
I32Load $0
Get R4
I32Xor
Tee R2
I32Const $-2139062144
I32And
I32Const $-2139062144
I32Xor
Get R2
I32Const $-16843009
I32Add
I32And
I32Eqz
If
Get R0
I32Const $4
I32Add
Set R0
Get R1
I32Const $-4
I32Add
Tee R1
I32Const $3
I32GtU
BrIf $1
Br $3
End
End
Else
Get R2
Set R1
Br $1
End
Br $1
End
Get R1
I32Eqz
If
I32Const $0
Set R1
Br $3
End
End
Loop
Get R0
I32Load8U $0
Get R3
I32Eq
BrIf $2
Get R0
I32Const $1
I32Add
Set R0
Get R1
I32Const $-1
I32Add
Tee R1
BrIf $0
I32Const $0
Set R1
End
End
End
Get R0
I32Const $0
Get R1
Select
Return

View File

@@ -0,0 +1,23 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bytealg
func LastIndexByte(s []byte, c byte) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
}
func LastIndexByteString(s string, c byte) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
}

View File

@@ -0,0 +1,149 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package byteorder provides functions for decoding and encoding
// little and big endian integer types from/to byte slices.
package byteorder
func LeUint16(b []byte) uint16 {
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint16(b[0]) | uint16(b[1])<<8
}
func LePutUint16(b []byte, v uint16) {
_ = b[1] // early bounds check to guarantee safety of writes below
b[0] = byte(v)
b[1] = byte(v >> 8)
}
func LeAppendUint16(b []byte, v uint16) []byte {
return append(b,
byte(v),
byte(v>>8),
)
}
func LeUint32(b []byte) uint32 {
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func LePutUint32(b []byte, v uint32) {
_ = b[3] // early bounds check to guarantee safety of writes below
b[0] = byte(v)
b[1] = byte(v >> 8)
b[2] = byte(v >> 16)
b[3] = byte(v >> 24)
}
func LeAppendUint32(b []byte, v uint32) []byte {
return append(b,
byte(v),
byte(v>>8),
byte(v>>16),
byte(v>>24),
)
}
func LeUint64(b []byte) uint64 {
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func LePutUint64(b []byte, v uint64) {
_ = b[7] // early bounds check to guarantee safety of writes below
b[0] = byte(v)
b[1] = byte(v >> 8)
b[2] = byte(v >> 16)
b[3] = byte(v >> 24)
b[4] = byte(v >> 32)
b[5] = byte(v >> 40)
b[6] = byte(v >> 48)
b[7] = byte(v >> 56)
}
func LeAppendUint64(b []byte, v uint64) []byte {
return append(b,
byte(v),
byte(v>>8),
byte(v>>16),
byte(v>>24),
byte(v>>32),
byte(v>>40),
byte(v>>48),
byte(v>>56),
)
}
func BeUint16(b []byte) uint16 {
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint16(b[1]) | uint16(b[0])<<8
}
func BePutUint16(b []byte, v uint16) {
_ = b[1] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 8)
b[1] = byte(v)
}
func BeAppendUint16(b []byte, v uint16) []byte {
return append(b,
byte(v>>8),
byte(v),
)
}
func BeUint32(b []byte) uint32 {
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
}
func BePutUint32(b []byte, v uint32) {
_ = b[3] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 24)
b[1] = byte(v >> 16)
b[2] = byte(v >> 8)
b[3] = byte(v)
}
func BeAppendUint32(b []byte, v uint32) []byte {
return append(b,
byte(v>>24),
byte(v>>16),
byte(v>>8),
byte(v),
)
}
func BeUint64(b []byte) uint64 {
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
}
func BePutUint64(b []byte, v uint64) {
_ = b[7] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 56)
b[1] = byte(v >> 48)
b[2] = byte(v >> 40)
b[3] = byte(v >> 32)
b[4] = byte(v >> 24)
b[5] = byte(v >> 16)
b[6] = byte(v >> 8)
b[7] = byte(v)
}
func BeAppendUint64(b []byte, v uint64) []byte {
return append(b,
byte(v>>56),
byte(v>>48),
byte(v>>40),
byte(v>>32),
byte(v>>24),
byte(v>>16),
byte(v>>8),
byte(v),
)
}

72
src/internal/cfg/cfg.go Normal file
View File

@@ -0,0 +1,72 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cfg holds configuration shared by the Go command and internal/testenv.
// Definitions that don't need to be exposed outside of cmd/go should be in
// cmd/go/internal/cfg instead of this package.
package cfg
// KnownEnv is a list of environment variables that affect the operation
// of the Go command.
const KnownEnv = `
AR
CC
CGO_CFLAGS
CGO_CFLAGS_ALLOW
CGO_CFLAGS_DISALLOW
CGO_CPPFLAGS
CGO_CPPFLAGS_ALLOW
CGO_CPPFLAGS_DISALLOW
CGO_CXXFLAGS
CGO_CXXFLAGS_ALLOW
CGO_CXXFLAGS_DISALLOW
CGO_ENABLED
CGO_FFLAGS
CGO_FFLAGS_ALLOW
CGO_FFLAGS_DISALLOW
CGO_LDFLAGS
CGO_LDFLAGS_ALLOW
CGO_LDFLAGS_DISALLOW
CXX
FC
GCCGO
GO111MODULE
GO386
GOAMD64
GOARCH
GOARM
GOARM64
GOBIN
GOCACHE
GOCACHEPROG
GOENV
GOEXE
GOEXPERIMENT
GOFLAGS
GOGCCFLAGS
GOHOSTARCH
GOHOSTOS
GOINSECURE
GOMIPS
GOMIPS64
GOMODCACHE
GONOPROXY
GONOSUMDB
GOOS
GOPATH
GOPPC64
GOPRIVATE
GOPROXY
GORISCV64
GOROOT
GOSUMDB
GOTMPDIR
GOTOOLCHAIN
GOTOOLDIR
GOVCS
GOWASM
GOWORK
GO_EXTLINK_ENABLED
PKG_CONFIG
`

View File

@@ -0,0 +1,160 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package chacha8rand implements a pseudorandom generator
// based on ChaCha8. It is used by both runtime and math/rand/v2
// and must have minimal dependencies.
package chacha8rand
import "internal/byteorder"
const (
ctrInc = 4 // increment counter by 4 between block calls
ctrMax = 16 // reseed when counter reaches 16
chunk = 32 // each chunk produced by block is 32 uint64s
reseed = 4 // reseed with 4 words
)
// block is the chacha8rand block function.
func block(seed *[4]uint64, blocks *[32]uint64, counter uint32)
// A State holds the state for a single random generator.
// It must be used from one goroutine at a time.
// If used by multiple goroutines at a time, the goroutines
// may see the same random values, but the code will not
// crash or cause out-of-bounds memory accesses.
type State struct {
buf [32]uint64
seed [4]uint64
i uint32
n uint32
c uint32
}
// Next returns the next random value, along with a boolean
// indicating whether one was available.
// If one is not available, the caller should call Refill
// and then repeat the call to Next.
//
// Next is //go:nosplit to allow its use in the runtime
// with per-m data without holding the per-m lock.
//
//go:nosplit
func (s *State) Next() (uint64, bool) {
i := s.i
if i >= s.n {
return 0, false
}
s.i = i + 1
return s.buf[i&31], true // i&31 eliminates bounds check
}
// Init seeds the State with the given seed value.
func (s *State) Init(seed [32]byte) {
s.Init64([4]uint64{
byteorder.LeUint64(seed[0*8:]),
byteorder.LeUint64(seed[1*8:]),
byteorder.LeUint64(seed[2*8:]),
byteorder.LeUint64(seed[3*8:]),
})
}
// Init64 seeds the state with the given seed value.
func (s *State) Init64(seed [4]uint64) {
s.seed = seed
block(&s.seed, &s.buf, 0)
s.c = 0
s.i = 0
s.n = chunk
}
// Refill refills the state with more random values.
// After a call to Refill, an immediate call to Next will succeed
// (unless multiple goroutines are incorrectly sharing a state).
func (s *State) Refill() {
s.c += ctrInc
if s.c == ctrMax {
// Reseed with generated uint64s for forward secrecy.
// Normally this is done immediately after computing a block,
// but we do it immediately before computing the next block,
// to allow a much smaller serialized state (just the seed plus offset).
// This gives a delayed benefit for the forward secrecy
// (you can reconstruct the recent past given a memory dump),
// which we deem acceptable in exchange for the reduced size.
s.seed[0] = s.buf[len(s.buf)-reseed+0]
s.seed[1] = s.buf[len(s.buf)-reseed+1]
s.seed[2] = s.buf[len(s.buf)-reseed+2]
s.seed[3] = s.buf[len(s.buf)-reseed+3]
s.c = 0
}
block(&s.seed, &s.buf, s.c)
s.i = 0
s.n = uint32(len(s.buf))
if s.c == ctrMax-ctrInc {
s.n = uint32(len(s.buf)) - reseed
}
}
// Reseed reseeds the state with new random values.
// After a call to Reseed, any previously returned random values
// have been erased from the memory of the state and cannot be
// recovered.
func (s *State) Reseed() {
var seed [4]uint64
for i := range seed {
for {
x, ok := s.Next()
if ok {
seed[i] = x
break
}
s.Refill()
}
}
s.Init64(seed)
}
// Marshal marshals the state into a byte slice.
// Marshal and Unmarshal are functions, not methods,
// so that they will not be linked into the runtime
// when it uses the State struct, since the runtime
// does not need these.
func Marshal(s *State) []byte {
data := make([]byte, 6*8)
copy(data, "chacha8:")
used := (s.c/ctrInc)*chunk + s.i
byteorder.BePutUint64(data[1*8:], uint64(used))
for i, seed := range s.seed {
byteorder.LePutUint64(data[(2+i)*8:], seed)
}
return data
}
type errUnmarshalChaCha8 struct{}
func (*errUnmarshalChaCha8) Error() string {
return "invalid ChaCha8 encoding"
}
// Unmarshal unmarshals the state from a byte slice.
func Unmarshal(s *State, data []byte) error {
if len(data) != 6*8 || string(data[:8]) != "chacha8:" {
return new(errUnmarshalChaCha8)
}
used := byteorder.BeUint64(data[1*8:])
if used > (ctrMax/ctrInc)*chunk-reseed {
return new(errUnmarshalChaCha8)
}
for i := range s.seed {
s.seed[i] = byteorder.LeUint64(data[(2+i)*8:])
}
s.c = ctrInc * (uint32(used) / chunk)
block(&s.seed, &s.buf, s.c)
s.i = uint32(used) % chunk
s.n = chunk
if s.c == ctrMax-ctrInc {
s.n = chunk - reseed
}
return nil
}

View File

@@ -0,0 +1,174 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// ChaCha8 is ChaCha with 8 rounds.
// See https://cr.yp.to/chacha/chacha-20080128.pdf.
// See chacha8_generic.go for additional details.
// ROL rotates the uint32s in register R left by N bits, using temporary T.
#define ROL(N, R, T) \
MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R
// ROL16 rotates the uint32s in register R left by 16, using temporary T if needed.
#ifdef GOAMD64_v2
#define ROL16(R, T) PSHUFB ·rol16<>(SB), R
#else
#define ROL16(R, T) ROL(16, R, T)
#endif
// ROL8 rotates the uint32s in register R left by 8, using temporary T if needed.
#ifdef GOAMD64_v2
#define ROL8(R, T) PSHUFB ·rol8<>(SB), R
#else
#define ROL8(R, T) ROL(8, R, T)
#endif
// QR is the ChaCha quarter-round on A, B, C, and D. T is an available temporary.
#define QR(A, B, C, D, T) \
PADDD B, A; PXOR A, D; ROL16(D, T); \
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B; \
PADDD B, A; PXOR A, D; ROL8(D, T); \
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B
// REPLREG replicates the register R into 4 uint32s in XR.
#define REPLREG(R, XR) \
MOVQ R, XR; \
PSHUFD $0, XR, XR
// REPL replicates the uint32 constant val into 4 uint32s in XR. It smashes DX.
#define REPL(val, XR) \
MOVL $val, DX; \
REPLREG(DX, XR)
// SEED copies the off'th uint32 of the seed into the register XR,
// replicating it into all four stripes of the register.
#define SEED(off, reg, XR) \
MOVL (4*off)(AX), reg; \
REPLREG(reg, XR) \
// block runs 4 ChaCha8 block transformations in the four stripes of the X registers.
// func block(seed *[8]uint32, blocks *[16][4]uint32, counter uint32)
TEXT ·block<ABIInternal>(SB), NOSPLIT, $16
// seed in AX
// blocks in BX
// counter in CX
// Load initial constants into top row.
REPL(0x61707865, X0)
REPL(0x3320646e, X1)
REPL(0x79622d32, X2)
REPL(0x6b206574, X3)
// Load counter into bottom left cell.
// Each stripe gets a different counter: 0, 1, 2, 3.
// (PINSRD is not available in GOAMD64_v1,
// so just do it in memory on all systems.
// This is not on the critical path.)
MOVL CX, 0(SP)
INCL CX
MOVL CX, 4(SP)
INCL CX
MOVL CX, 8(SP)
INCL CX
MOVL CX, 12(SP)
MOVOU 0(SP), X12
// Load seed words into next two rows and into DI, SI, R8..R13
SEED(0, DI, X4)
SEED(1, SI, X5)
SEED(2, R8, X6)
SEED(3, R9, X7)
SEED(4, R10, X8)
SEED(5, R11, X9)
SEED(6, R12, X10)
SEED(7, R13, X11)
// Zeros for remaining two matrix entries.
// We have just enough XMM registers to hold the state,
// without one for the temporary, so we flush and restore
// some values to and from memory to provide a temporary.
// The initial temporary is X15, so zero its memory instead
// of X15 itself.
MOVL $0, DX
MOVQ DX, X13
MOVQ DX, X14
MOVOU X14, (15*16)(BX)
// 4 iterations. Each iteration is 8 quarter-rounds.
MOVL $4, DX
loop:
QR(X0, X4, X8, X12, X15)
MOVOU X4, (4*16)(BX) // save X4
QR(X1, X5, X9, X13, X15)
MOVOU (15*16)(BX), X15 // reload X15; temp now X4
QR(X2, X6, X10, X14, X4)
QR(X3, X7, X11, X15, X4)
QR(X0, X5, X10, X15, X4)
MOVOU X15, (15*16)(BX) // save X15
QR(X1, X6, X11, X12, X4)
MOVOU (4*16)(BX), X4 // reload X4; temp now X15
QR(X2, X7, X8, X13, X15)
QR(X3, X4, X9, X14, X15)
DECL DX
JNZ loop
// Store interlaced blocks back to output buffer,
// adding original seed along the way.
// First the top and bottom rows.
MOVOU X0, (0*16)(BX)
MOVOU X1, (1*16)(BX)
MOVOU X2, (2*16)(BX)
MOVOU X3, (3*16)(BX)
MOVOU X12, (12*16)(BX)
MOVOU X13, (13*16)(BX)
MOVOU X14, (14*16)(BX)
// X15 has already been stored.
// Now we have X0-X3, X12-X15 available for temporaries.
// Add seed rows back to output. We left seed in DI, SI, R8..R13 above.
REPLREG(DI, X0)
REPLREG(SI, X1)
REPLREG(R8, X2)
REPLREG(R9, X3)
REPLREG(R10, X12)
REPLREG(R11, X13)
REPLREG(R12, X14)
REPLREG(R13, X15)
PADDD X0, X4
PADDD X1, X5
PADDD X2, X6
PADDD X3, X7
PADDD X12, X8
PADDD X13, X9
PADDD X14, X10
PADDD X15, X11
MOVOU X4, (4*16)(BX)
MOVOU X5, (5*16)(BX)
MOVOU X6, (6*16)(BX)
MOVOU X7, (7*16)(BX)
MOVOU X8, (8*16)(BX)
MOVOU X9, (9*16)(BX)
MOVOU X10, (10*16)(BX)
MOVOU X11, (11*16)(BX)
MOVL $0, AX
MOVQ AX, X15 // must be 0 on return
RET
// rotate left 16 indexes for PSHUFB
GLOBL ·rol16<>(SB), NOPTR|RODATA, $16
DATA ·rol16<>+0(SB)/8, $0x0504070601000302
DATA ·rol16<>+8(SB)/8, $0x0D0C0F0E09080B0A
// rotate left 8 indexes for PSHUFB
GLOBL ·rol8<>(SB), NOPTR|RODATA, $16
DATA ·rol8<>+0(SB)/8, $0x0605040702010003
DATA ·rol8<>+8(SB)/8, $0x0E0D0C0F0A09080B

View File

@@ -0,0 +1,104 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// QR is the ChaCha quarter-round on A, B, C, and D.
// V30 is used as a temporary, and V31 is assumed to
// hold the index table for rotate left 8.
#define QR(A, B, C, D) \
VADD A.S4, B.S4, A.S4; VEOR D.B16, A.B16, D.B16; VREV32 D.H8, D.H8; \
VADD C.S4, D.S4, C.S4; VEOR B.B16, C.B16, V30.B16; VSHL $12, V30.S4, B.S4; VSRI $20, V30.S4, B.S4 \
VADD A.S4, B.S4, A.S4; VEOR D.B16, A.B16, D.B16; VTBL V31.B16, [D.B16], D.B16; \
VADD C.S4, D.S4, C.S4; VEOR B.B16, C.B16, V30.B16; VSHL $7, V30.S4, B.S4; VSRI $25, V30.S4, B.S4
// block runs 4 ChaCha8 block transformations in the four stripes of the V registers.
// func block(seed *[8]uint32, blocks *[4][16]uint32, counter uint32)
TEXT ·block<ABIInternal>(SB), NOSPLIT, $16
// seed in R0
// blocks in R1
// counter in R2
// Load initial constants into top row.
MOVD $·chachaConst(SB), R10
VLD4R (R10), [V0.S4, V1.S4, V2.S4, V3.S4]
// Load increment and rotate 8 constants into V30, V31.
MOVD $·chachaIncRot(SB), R11
VLD1 (R11), [V30.S4, V31.S4]
VLD4R.P 16(R0), [V4.S4, V5.S4, V6.S4, V7.S4]
VLD4R.P 16(R0), [V8.S4, V9.S4, V10.S4, V11.S4]
// store counter to memory to replicate its uint32 halfs back out
MOVW R2, 0(RSP)
VLD1R 0(RSP), [V12.S4]
// Add 0, 1, 2, 3 to counter stripes.
VADD V30.S4, V12.S4, V12.S4
// Zeros for remaining two matrix entries.
VEOR V13.B16, V13.B16, V13.B16
VEOR V14.B16, V14.B16, V14.B16
VEOR V15.B16, V15.B16, V15.B16
// Save seed state for adding back later.
VMOV V4.B16, V20.B16
VMOV V5.B16, V21.B16
VMOV V6.B16, V22.B16
VMOV V7.B16, V23.B16
VMOV V8.B16, V24.B16
VMOV V9.B16, V25.B16
VMOV V10.B16, V26.B16
VMOV V11.B16, V27.B16
// 4 iterations. Each iteration is 8 quarter-rounds.
MOVD $4, R0
loop:
QR(V0, V4, V8, V12)
QR(V1, V5, V9, V13)
QR(V2, V6, V10, V14)
QR(V3, V7, V11, V15)
QR(V0, V5, V10, V15)
QR(V1, V6, V11, V12)
QR(V2, V7, V8, V13)
QR(V3, V4, V9, V14)
SUB $1, R0
CBNZ R0, loop
// Add seed back.
VADD V4.S4, V20.S4, V4.S4
VADD V5.S4, V21.S4, V5.S4
VADD V6.S4, V22.S4, V6.S4
VADD V7.S4, V23.S4, V7.S4
VADD V8.S4, V24.S4, V8.S4
VADD V9.S4, V25.S4, V9.S4
VADD V10.S4, V26.S4, V10.S4
VADD V11.S4, V27.S4, V11.S4
// Store interlaced blocks back to output buffer.
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)
VST1.P [ V4.B16, V5.B16, V6.B16, V7.B16], 64(R1)
VST1.P [ V8.B16, V9.B16, V10.B16, V11.B16], 64(R1)
VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R1)
RET
GLOBL ·chachaConst(SB), NOPTR|RODATA, $32
DATA ·chachaConst+0x00(SB)/4, $0x61707865
DATA ·chachaConst+0x04(SB)/4, $0x3320646e
DATA ·chachaConst+0x08(SB)/4, $0x79622d32
DATA ·chachaConst+0x0c(SB)/4, $0x6b206574
GLOBL ·chachaIncRot(SB), NOPTR|RODATA, $32
DATA ·chachaIncRot+0x00(SB)/4, $0x00000000
DATA ·chachaIncRot+0x04(SB)/4, $0x00000001
DATA ·chachaIncRot+0x08(SB)/4, $0x00000002
DATA ·chachaIncRot+0x0c(SB)/4, $0x00000003
DATA ·chachaIncRot+0x10(SB)/4, $0x02010003
DATA ·chachaIncRot+0x14(SB)/4, $0x06050407
DATA ·chachaIncRot+0x18(SB)/4, $0x0A09080B
DATA ·chachaIncRot+0x1c(SB)/4, $0x0E0D0C0F

View File

@@ -0,0 +1,235 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// ChaCha8 is ChaCha with 8 rounds.
// See https://cr.yp.to/chacha/chacha-20080128.pdf.
//
// ChaCha8 operates on a 4x4 matrix of uint32 values, initially set to:
//
// const1 const2 const3 const4
// seed seed seed seed
// seed seed seed seed
// counter64 0 0
//
// We use the same constants as ChaCha20 does, a random seed,
// and a counter. Running ChaCha8 on this input produces
// a 4x4 matrix of pseudo-random values with as much entropy
// as the seed.
//
// Given SIMD registers that can hold N uint32s, it is possible
// to run N ChaCha8 block transformations in parallel by filling
// the first register with the N copies of const1, the second
// with N copies of const2, and so on, and then running the operations.
//
// Each iteration of ChaCha8Rand operates over 32 bytes of input and
// produces 992 bytes of RNG output, plus 32 bytes of input for the next
// iteration.
//
// The 32 bytes of input are used as a ChaCha8 key, with a zero nonce, to
// produce 1024 bytes of output (16 blocks, with counters 0 to 15).
// First, for each block, the values 0x61707865, 0x3320646e, 0x79622d32,
// 0x6b206574 are subtracted from the 32-bit little-endian words at
// position 0, 1, 2, and 3 respectively, and an increasing counter
// starting at zero is subtracted from each word at position 12. Then,
// this stream is permuted such that for each sequence of four blocks,
// first we output the first four bytes of each block, then the next four
// bytes of each block, and so on. Finally, the last 32 bytes of output
// are used as the input of the next iteration, and the remaining 992
// bytes are the RNG output.
//
// See https://c2sp.org/chacha8rand for additional details.
//
// Normal ChaCha20 implementations for encryption use this same
// parallelism but then have to deinterlace the results so that
// it appears the blocks were generated separately. For the purposes
// of generating random numbers, the interlacing is fine.
// We are simply locked in to preserving the 4-way interlacing
// in any future optimizations.
package chacha8rand
import (
"internal/goarch"
"unsafe"
)
// setup sets up 4 ChaCha8 blocks in b32 with the counter and seed.
// Note that b32 is [16][4]uint32 not [4][16]uint32: the blocks are interlaced
// the same way they would be in a 4-way SIMD implementations.
func setup(seed *[4]uint64, b32 *[16][4]uint32, counter uint32) {
// Convert to uint64 to do half as many stores to memory.
b := (*[16][2]uint64)(unsafe.Pointer(b32))
// Constants; same as in ChaCha20: "expand 32-byte k"
b[0][0] = 0x61707865_61707865
b[0][1] = 0x61707865_61707865
b[1][0] = 0x3320646e_3320646e
b[1][1] = 0x3320646e_3320646e
b[2][0] = 0x79622d32_79622d32
b[2][1] = 0x79622d32_79622d32
b[3][0] = 0x6b206574_6b206574
b[3][1] = 0x6b206574_6b206574
// Seed values.
var x64 uint64
var x uint32
x = uint32(seed[0])
x64 = uint64(x)<<32 | uint64(x)
b[4][0] = x64
b[4][1] = x64
x = uint32(seed[0] >> 32)
x64 = uint64(x)<<32 | uint64(x)
b[5][0] = x64
b[5][1] = x64
x = uint32(seed[1])
x64 = uint64(x)<<32 | uint64(x)
b[6][0] = x64
b[6][1] = x64
x = uint32(seed[1] >> 32)
x64 = uint64(x)<<32 | uint64(x)
b[7][0] = x64
b[7][1] = x64
x = uint32(seed[2])
x64 = uint64(x)<<32 | uint64(x)
b[8][0] = x64
b[8][1] = x64
x = uint32(seed[2] >> 32)
x64 = uint64(x)<<32 | uint64(x)
b[9][0] = x64
b[9][1] = x64
x = uint32(seed[3])
x64 = uint64(x)<<32 | uint64(x)
b[10][0] = x64
b[10][1] = x64
x = uint32(seed[3] >> 32)
x64 = uint64(x)<<32 | uint64(x)
b[11][0] = x64
b[11][1] = x64
// Counters.
if goarch.BigEndian {
b[12][0] = uint64(counter+0)<<32 | uint64(counter+1)
b[12][1] = uint64(counter+2)<<32 | uint64(counter+3)
} else {
b[12][0] = uint64(counter+0) | uint64(counter+1)<<32
b[12][1] = uint64(counter+2) | uint64(counter+3)<<32
}
// Zeros.
b[13][0] = 0
b[13][1] = 0
b[14][0] = 0
b[14][1] = 0
b[15][0] = 0
b[15][1] = 0
}
func _() {
// block and block_generic must have same type
x := block
x = block_generic
_ = x
}
// block_generic is the non-assembly block implementation,
// for use on systems without special assembly.
// Even on such systems, it is quite fast: on GOOS=386,
// ChaCha8 using this code generates random values faster than PCG-DXSM.
func block_generic(seed *[4]uint64, buf *[32]uint64, counter uint32) {
b := (*[16][4]uint32)(unsafe.Pointer(buf))
setup(seed, b, counter)
for i := range b[0] {
// Load block i from b[*][i] into local variables.
b0 := b[0][i]
b1 := b[1][i]
b2 := b[2][i]
b3 := b[3][i]
b4 := b[4][i]
b5 := b[5][i]
b6 := b[6][i]
b7 := b[7][i]
b8 := b[8][i]
b9 := b[9][i]
b10 := b[10][i]
b11 := b[11][i]
b12 := b[12][i]
b13 := b[13][i]
b14 := b[14][i]
b15 := b[15][i]
// 4 iterations of eight quarter-rounds each is 8 rounds
for round := 0; round < 4; round++ {
b0, b4, b8, b12 = qr(b0, b4, b8, b12)
b1, b5, b9, b13 = qr(b1, b5, b9, b13)
b2, b6, b10, b14 = qr(b2, b6, b10, b14)
b3, b7, b11, b15 = qr(b3, b7, b11, b15)
b0, b5, b10, b15 = qr(b0, b5, b10, b15)
b1, b6, b11, b12 = qr(b1, b6, b11, b12)
b2, b7, b8, b13 = qr(b2, b7, b8, b13)
b3, b4, b9, b14 = qr(b3, b4, b9, b14)
}
// Store block i back into b[*][i].
// Add b4..b11 back to the original key material,
// like in ChaCha20, to avoid trivial invertibility.
// There is no entropy in b0..b3 and b12..b15
// so we can skip the additions and save some time.
b[0][i] = b0
b[1][i] = b1
b[2][i] = b2
b[3][i] = b3
b[4][i] += b4
b[5][i] += b5
b[6][i] += b6
b[7][i] += b7
b[8][i] += b8
b[9][i] += b9
b[10][i] += b10
b[11][i] += b11
b[12][i] = b12
b[13][i] = b13
b[14][i] = b14
b[15][i] = b15
}
if goarch.BigEndian {
// On a big-endian system, reading the uint32 pairs as uint64s
// will word-swap them compared to little-endian, so we word-swap
// them here first to make the next swap get the right answer.
for i, x := range buf {
buf[i] = x>>32 | x<<32
}
}
}
// qr is the (inlinable) ChaCha8 quarter round.
func qr(a, b, c, d uint32) (_a, _b, _c, _d uint32) {
a += b
d ^= a
d = d<<16 | d>>16
c += d
b ^= c
b = b<<12 | b>>20
a += b
d ^= a
d = d<<8 | d>>24
c += d
b ^= c
b = b<<7 | b>>25
return a, b, c, d
}

View File

@@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 && !arm64
#include "textflag.h"
// func block(counter uint64, seed *[8]uint32, blocks *[16][4]uint32)
TEXT ·block(SB), NOSPLIT, $0
JMP ·block_generic(SB)

View File

@@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package chacha8rand
var Block = block
var Block_generic = block_generic
func Seed(s *State) [4]uint64 {
return s.seed
}

View File

@@ -0,0 +1,202 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package chacha8rand_test
import (
"bytes"
"encoding/binary"
"fmt"
. "internal/chacha8rand"
"slices"
"testing"
)
func TestOutput(t *testing.T) {
var s State
s.Init(seed)
for i := range output {
for {
x, ok := s.Next()
if ok {
if x != output[i] {
t.Errorf("#%d: have %#x want %#x", i, x, output[i])
}
break
}
s.Refill()
}
}
}
func TestMarshal(t *testing.T) {
var s State
s.Init(seed)
for i := range output {
for {
b := Marshal(&s)
s = State{}
err := Unmarshal(&s, b)
if err != nil {
t.Fatalf("#%d: Unmarshal: %v", i, err)
}
x, ok := s.Next()
if ok {
if x != output[i] {
t.Fatalf("#%d: have %#x want %#x", i, x, output[i])
}
break
}
s.Refill()
}
}
}
func TestReseed(t *testing.T) {
var s State
s.Init(seed)
old := Seed(&s)
s.Reseed()
if Seed(&s) == old {
t.Errorf("Reseed did not change seed")
}
}
func BenchmarkBlock(b *testing.B) {
var seed [4]uint64
var blocks [32]uint64
for i := 0; i < b.N; i++ {
Block(&seed, &blocks, 0)
}
b.SetBytes(32 * 8)
}
func TestBlockGeneric(t *testing.T) {
var b1, b2 [32]uint64
s := seed // byte seed
seed := [4]uint64{
binary.LittleEndian.Uint64(s[0*8:]),
binary.LittleEndian.Uint64(s[1*8:]),
binary.LittleEndian.Uint64(s[2*8:]),
binary.LittleEndian.Uint64(s[3*8:]),
}
Block(&seed, &b1, 4)
Block_generic(&seed, &b2, 4)
if !slices.Equal(b1[:], b2[:]) {
var out bytes.Buffer
fmt.Fprintf(&out, "%-18s %-18s\n", "block", "block_generic")
for i := range b1 {
suffix := ""
if b1[i] != b2[i] {
suffix = " mismatch!"
}
fmt.Fprintf(&out, "%#016x %#016x%s\n", b1[i], b2[i], suffix)
}
t.Errorf("block and block_generic disagree:\n%s", out.String())
}
}
// Golden output test to make sure algorithm never changes,
// so that its use in math/rand/v2 stays stable.
// See https://c2sp.org/chacha8rand.
var seed = [32]byte([]byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ123456"))
var output = []uint64{
0xb773b6063d4616a5, 0x1160af22a66abc3c, 0x8c2599d9418d287c, 0x7ee07e037edc5cd6,
0xcfaa9ee02d1c16ad, 0x0e090eef8febea79, 0x3c82d271128b5b3e, 0x9c5addc11252a34f,
0xdf79bb617d6ceea6, 0x36d553591f9d736a, 0xeef0d14e181ee01f, 0x089bfc760ae58436,
0xd9e52b59cc2ad268, 0xeb2fb4444b1b8aba, 0x4f95c8a692c46661, 0xc3c6323217cae62c,
0x91ebb4367f4e2e7e, 0x784cf2c6a0ec9bc6, 0x5c34ec5c34eabe20, 0x4f0a8f515570daa8,
0xfc35dcb4113d6bf2, 0x5b0da44c645554bc, 0x6d963da3db21d9e1, 0xeeaefc3150e500f3,
0x2d37923dda3750a5, 0x380d7a626d4bc8b0, 0xeeaf68ede3d7ee49, 0xf4356695883b717c,
0x846a9021392495a4, 0x8e8510549630a61b, 0x18dc02545dbae493, 0x0f8f9ff0a65a3d43,
0xccf065f7190ff080, 0xfd76d1aa39673330, 0x95d232936cba6433, 0x6c7456d1070cbd17,
0x462acfdaff8c6562, 0x5bafab866d34fc6a, 0x0c862f78030a2988, 0xd39a83e407c3163d,
0xc00a2b7b45f22ebf, 0x564307c62466b1a9, 0x257e0424b0c072d4, 0x6fb55e99496c28fe,
0xae9873a88f5cd4e0, 0x4657362ac60d3773, 0x1c83f91ecdf23e8e, 0x6fdc0792c15387c0,
0x36dad2a30dfd2b5c, 0xa4b593290595bdb7, 0x4de18934e4cc02c5, 0xcdc0d604f015e3a7,
0xfba0dbf69ad80321, 0x60e8bea3d139de87, 0xd18a4d851ef48756, 0x6366447c2215f34a,
0x05682e97d3d007ee, 0x4c0e8978c6d54ab2, 0xcf1e9f6a6712edc2, 0x061439414c80cfd3,
0xd1a8b6e2745c0ead, 0x31a7918d45c410e8, 0xabcc61ad90216eec, 0x4040d92d2032a71a,
0x3cd2f66ffb40cd68, 0xdcd051c07295857a, 0xeab55cbcd9ab527e, 0x18471dce781bdaac,
0xf7f08cd144dc7252, 0x5804e0b13d7f40d1, 0x5cb1a446e4b2d35b, 0xe6d4a728d2138a06,
0x05223e40ca60dad8, 0x2d61ec3206ac6a68, 0xab692356874c17b8, 0xc30954417676de1c,
0x4f1ace3732225624, 0xfba9510813988338, 0x997f200f52752e11, 0x1116aaafe86221fa,
0x07ce3b5cb2a13519, 0x2956bc72bc458314, 0x4188b7926140eb78, 0x56ca6dbfd4adea4d,
0x7fe3c22349340ce5, 0x35c08f9c37675f8a, 0x11e1c7fbef5ed521, 0x98adc8464ec1bc75,
0xd163b2c73d1203f8, 0x8c761ee043a2f3f3, 0x24b99d6accecd7b7, 0x793e31aa112f0370,
0x8e87dc2a19285139, 0x4247ae04f7096e25, 0x514f3122926fe20f, 0xdc6fb3f045d2a7e9,
0x15cb30cecdd18eba, 0xcbc7fdecf6900274, 0x3fb5c696dc8ba021, 0xd1664417c8d274e6,
0x05f7e445ea457278, 0xf920bbca1b9db657, 0x0c1950b4da22cb99, 0xf875baf1af09e292,
0xbed3d7b84250f838, 0xf198e8080fd74160, 0xc9eda51d9b7ea703, 0xf709ef55439bf8f6,
0xd20c74feebf116fc, 0x305668eb146d7546, 0x829af3ec10d89787, 0x15b8f9697b551dbc,
0xfc823c6c8e64b8c9, 0x345585e8183b40bc, 0x674b4171d6581368, 0x1234d81cd670e9f7,
0x0e505210d8a55e19, 0xe8258d69eeeca0dc, 0x05d4c452e8baf67e, 0xe8dbe30116a45599,
0x1cf08ce1b1176f00, 0xccf7d0a4b81ecb49, 0x303fea136b2c430e, 0x861d6c139c06c871,
0x5f41df72e05e0487, 0x25bd7e1e1ae26b1d, 0xbe9f4004d662a41d, 0x65bf58d483188546,
0xd1b27cff69db13cc, 0x01a6663372c1bb36, 0x578dd7577b727f4d, 0x19c78f066c083cf6,
0xdbe014d4f9c391bb, 0x97fbb2dd1d13ffb3, 0x31c91e0af9ef8d4f, 0x094dfc98402a43ba,
0x069bd61bea37b752, 0x5b72d762e8d986ca, 0x72ee31865904bc85, 0xd1f5fdc5cd36c33e,
0xba9b4980a8947cad, 0xece8f05eac49ab43, 0x65fe1184abae38e7, 0x2d7cb9dea5d31452,
0xcc71489476e467e3, 0x4c03a258a578c68c, 0x00efdf9ecb0fd8fc, 0x9924cad471e2666d,
0x87f8668318f765e9, 0xcb4dc57c1b55f5d8, 0xd373835a86604859, 0xe526568b5540e482,
0x1f39040f08586fec, 0xb764f3f00293f8e6, 0x049443a2f6bd50a8, 0x76fec88697d3941a,
0x3efb70d039bae7a2, 0xe2f4611368eca8a8, 0x7c007a96e01d2425, 0xbbcce5768e69c5bf,
0x784fb4985c42aac3, 0xf72b5091aa223874, 0x3630333fb1e62e07, 0x8e7319ebdebbb8de,
0x2a3982bca959fa00, 0xb2b98b9f964ba9b3, 0xf7e31014adb71951, 0xebd0fca3703acc82,
0xec654e2a2fe6419a, 0xb326132d55a52e2c, 0x2248c57f44502978, 0x32710c2f342daf16,
0x0517b47b5acb2bec, 0x4c7a718fca270937, 0xd69142bed0bcc541, 0xe40ebcb8ff52ce88,
0x3e44a2dbc9f828d4, 0xc74c2f4f8f873f58, 0x3dbf648eb799e45b, 0x33f22475ee0e86f8,
0x1eb4f9ee16d47f65, 0x40f8d2b8712744e3, 0xb886b4da3cb14572, 0x2086326fbdd6f64d,
0xcc3de5907dd882b9, 0xa2e8b49a5ee909df, 0xdbfb8e7823964c10, 0x70dd6089ef0df8d5,
0x30141663cdd9c99f, 0x04b805325c240365, 0x7483d80314ac12d6, 0x2b271cb91aa7f5f9,
0x97e2245362abddf0, 0x5a84f614232a9fab, 0xf71125fcda4b7fa2, 0x1ca5a61d74b27267,
0x38cc6a9b3adbcb45, 0xdde1bb85dc653e39, 0xe9d0c8fa64f89fd4, 0x02c5fb1ecd2b4188,
0xf2bd137bca5756e5, 0xadefe25d121be155, 0x56cd1c3c5d893a8e, 0x4c50d337beb65bb9,
0x918c5151675cf567, 0xaba649ffcfb56a1e, 0x20c74ab26a2247cd, 0x71166bac853c08da,
0xb07befe2e584fc5d, 0xda45ff2a588dbf32, 0xdb98b03c4d75095e, 0x60285ae1aaa65a4c,
0xf93b686a263140b8, 0xde469752ee1c180e, 0xcec232dc04129aae, 0xeb916baa1835ea04,
0xd49c21c8b64388ff, 0x72a82d9658864888, 0x003348ef7eac66a8, 0x7f6f67e655b209eb,
0x532ffb0b7a941b25, 0xd940ade6128deede, 0xdf24f2a1af89fe23, 0x95aa3b4988195ae0,
0x3da649404f94be4a, 0x692dad132c3f7e27, 0x40aee76ecaaa9eb8, 0x1294a01e09655024,
0x6df797abdba4e4f5, 0xea2fb6024c1d7032, 0x5f4e0492295489fc, 0x57972914ea22e06a,
0x9a8137d133aad473, 0xa2e6dd6ae7cdf2f3, 0x9f42644f18086647, 0x16d03301c170bd3e,
0x908c416fa546656d, 0xe081503be22e123e, 0x077cf09116c4cc72, 0xcbd25cd264b7f229,
0x3db2f468ec594031, 0x46c00e734c9badd5, 0xd0ec0ac72075d861, 0x3037cb3cf80b7630,
0x574c3d7b3a2721c6, 0xae99906a0076824b, 0xb175a5418b532e70, 0xd8b3e251ee231ddd,
0xb433eec25dca1966, 0x530f30dc5cff9a93, 0x9ff03d98b53cd335, 0xafc4225076558cdf,
0xef81d3a28284402a, 0x110bdbf51c110a28, 0x9ae1b255d027e8f6, 0x7de3e0aa24688332,
0xe483c3ecd2067ee2, 0xf829328b276137e6, 0xa413ccad57562cad, 0xe6118e8b496acb1f,
0x8288dca6da5ec01f, 0xa53777dc88c17255, 0x8a00f1e0d5716eda, 0x618e6f47b7a720a8,
0x9e3907b0c692a841, 0x978b42ca963f34f3, 0x75e4b0cd98a7d7ef, 0xde4dbd6e0b5f4752,
0x0252e4153f34493f, 0x50f0e7d803734ef9, 0x237766a38ed167ee, 0x4124414001ee39a0,
0xd08df643e535bb21, 0x34f575b5a9a80b74, 0x2c343af87297f755, 0xcd8b6d99d821f7cb,
0xe376fd7256fc48ae, 0xe1b06e7334352885, 0xfa87b26f86c169eb, 0x36c1604665a971de,
0xdba147c2239c8e80, 0x6b208e69fc7f0e24, 0x8795395b6f2b60c3, 0x05dabee9194907f4,
0xb98175142f5ed902, 0x5e1701e2021ddc81, 0x0875aba2755eed08, 0x778d83289251de95,
0x3bfbe46a039ecb31, 0xb24704fce4cbd7f9, 0x6985ffe9a7c91e3d, 0xc8efb13df249dabb,
0xb1037e64b0f4c9f6, 0x55f69fd197d6b7c3, 0x672589d71d68a90c, 0xbebdb8224f50a77e,
0x3f589f80007374a7, 0xd307f4635954182a, 0xcff5850c10d4fd90, 0xc6da02dfb6408e15,
0x93daeef1e2b1a485, 0x65d833208aeea625, 0xe2b13fa13ed3b5fa, 0x67053538130fb68e,
0xc1042f6598218fa9, 0xee5badca749b8a2e, 0x6d22a3f947dae37d, 0xb62c6d1657f4dbaf,
0x6e007de69704c20b, 0x1af2b913fc3841d8, 0xdc0e47348e2e8e22, 0x9b1ddef1cf958b22,
0x632ed6b0233066b8, 0xddd02d3311bed8f2, 0xf147cfe1834656e9, 0x399aaa49d511597a,
0x6b14886979ec0309, 0x64fc4ac36b5afb97, 0xb82f78e07f7cf081, 0x10925c9a323d0e1b,
0xf451c79ee13c63f6, 0x7c2fc180317876c7, 0x35a12bd9eecb7d22, 0x335654a539621f90,
0xcc32a3f35db581f0, 0xc60748a80b2369cb, 0x7c4dd3b08591156b, 0xac1ced4b6de22291,
0xa32cfa2df134def5, 0x627108918dea2a53, 0x0555b1608fcb4ff4, 0x143ee7ac43aaa33c,
0xdae90ce7cf4fc218, 0x4d68fc2582bcf4b5, 0x37094e1849135d71, 0xf7857e09f3d49fd8,
0x007538c503768be7, 0xedf648ba2f6be601, 0xaa347664dd72513e, 0xbe63893c6ef23b86,
0x130b85710605af97, 0xdd765c6b1ef6ab56, 0xf3249a629a97dc6b, 0x2a114f9020fab8e5,
0x5a69e027cfc6ad08, 0x3c4ccb36f1a5e050, 0x2e9e7d596834f0a5, 0x2430be6858fce789,
0xe90b862f2466e597, 0x895e2884f159a9ec, 0x26ab8fa4902fcb57, 0xa6efff5c54e1fa50,
0x333ac4e5811a8255, 0xa58d515f02498611, 0xfe5a09dcb25c6ef4, 0x03898988ab5f5818,
0x289ff6242af6c617, 0x3d9dd59fd381ea23, 0x52d7d93d8a8aae51, 0xc76a123d511f786f,
0xf68901edaf00c46c, 0x8c630871b590de80, 0x05209c308991e091, 0x1f809f99b4788177,
0x11170c2eb6c19fd8, 0x44433c779062ba58, 0xc0acb51af1874c45, 0x9f2e134284809fa1,
0xedb523bd15c619fa, 0x02d97fd53ecc23c0, 0xacaf05a34462374c, 0xddd9c6d34bffa11f,
}

Some files were not shown because too many files have changed in this diff Show More