Initial commit: Go 1.23 release state
This commit is contained in:
102
src/internal/abi/abi.go
Normal file
102
src/internal/abi/abi.go
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
import (
|
||||
"internal/goarch"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// RegArgs is a struct that has space for each argument
|
||||
// and return value register on the current architecture.
|
||||
//
|
||||
// Assembly code knows the layout of the first two fields
|
||||
// of RegArgs.
|
||||
//
|
||||
// RegArgs also contains additional space to hold pointers
|
||||
// when it may not be safe to keep them only in the integer
|
||||
// register space otherwise.
|
||||
type RegArgs struct {
|
||||
// Values in these slots should be precisely the bit-by-bit
|
||||
// representation of how they would appear in a register.
|
||||
//
|
||||
// This means that on big endian arches, integer values should
|
||||
// be in the top bits of the slot. Floats are usually just
|
||||
// directly represented, but some architectures treat narrow
|
||||
// width floating point values specially (e.g. they're promoted
|
||||
// first, or they need to be NaN-boxed).
|
||||
Ints [IntArgRegs]uintptr // untyped integer registers
|
||||
Floats [FloatArgRegs]uint64 // untyped float registers
|
||||
|
||||
// Fields above this point are known to assembly.
|
||||
|
||||
// Ptrs is a space that duplicates Ints but with pointer type,
|
||||
// used to make pointers passed or returned in registers
|
||||
// visible to the GC by making the type unsafe.Pointer.
|
||||
Ptrs [IntArgRegs]unsafe.Pointer
|
||||
|
||||
// ReturnIsPtr is a bitmap that indicates which registers
|
||||
// contain or will contain pointers on the return path from
|
||||
// a reflectcall. The i'th bit indicates whether the i'th
|
||||
// register contains or will contain a valid Go pointer.
|
||||
ReturnIsPtr IntArgRegBitmap
|
||||
}
|
||||
|
||||
func (r *RegArgs) Dump() {
|
||||
print("Ints:")
|
||||
for _, x := range r.Ints {
|
||||
print(" ", x)
|
||||
}
|
||||
println()
|
||||
print("Floats:")
|
||||
for _, x := range r.Floats {
|
||||
print(" ", x)
|
||||
}
|
||||
println()
|
||||
print("Ptrs:")
|
||||
for _, x := range r.Ptrs {
|
||||
print(" ", x)
|
||||
}
|
||||
println()
|
||||
}
|
||||
|
||||
// IntRegArgAddr returns a pointer inside of r.Ints[reg] that is appropriately
|
||||
// offset for an argument of size argSize.
|
||||
//
|
||||
// argSize must be non-zero, fit in a register, and a power-of-two.
|
||||
//
|
||||
// This method is a helper for dealing with the endianness of different CPU
|
||||
// architectures, since sub-word-sized arguments in big endian architectures
|
||||
// need to be "aligned" to the upper edge of the register to be interpreted
|
||||
// by the CPU correctly.
|
||||
func (r *RegArgs) IntRegArgAddr(reg int, argSize uintptr) unsafe.Pointer {
|
||||
if argSize > goarch.PtrSize || argSize == 0 || argSize&(argSize-1) != 0 {
|
||||
panic("invalid argSize")
|
||||
}
|
||||
offset := uintptr(0)
|
||||
if goarch.BigEndian {
|
||||
offset = goarch.PtrSize - argSize
|
||||
}
|
||||
return unsafe.Pointer(uintptr(unsafe.Pointer(&r.Ints[reg])) + offset)
|
||||
}
|
||||
|
||||
// IntArgRegBitmap is a bitmap large enough to hold one bit per
|
||||
// integer argument/return register.
|
||||
type IntArgRegBitmap [(IntArgRegs + 7) / 8]uint8
|
||||
|
||||
// Set sets the i'th bit of the bitmap to 1.
|
||||
func (b *IntArgRegBitmap) Set(i int) {
|
||||
b[i/8] |= uint8(1) << (i % 8)
|
||||
}
|
||||
|
||||
// Get returns whether the i'th bit of the bitmap is set.
|
||||
//
|
||||
// nosplit because it's called in extremely sensitive contexts, like
|
||||
// on the reflectcall return path.
|
||||
//
|
||||
//go:nosplit
|
||||
func (b *IntArgRegBitmap) Get(i int) bool {
|
||||
return b[i/8]&(uint8(1)<<(i%8)) != 0
|
||||
}
|
||||
18
src/internal/abi/abi_amd64.go
Normal file
18
src/internal/abi/abi_amd64.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// See abi_generic.go.
|
||||
|
||||
// RAX, RBX, RCX, RDI, RSI, R8, R9, R10, R11.
|
||||
IntArgRegs = 9
|
||||
|
||||
// X0 -> X14.
|
||||
FloatArgRegs = 15
|
||||
|
||||
// We use SSE2 registers which support 64-bit float operations.
|
||||
EffectiveFloatRegSize = 8
|
||||
)
|
||||
17
src/internal/abi/abi_arm64.go
Normal file
17
src/internal/abi/abi_arm64.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// See abi_generic.go.
|
||||
|
||||
// R0 - R15.
|
||||
IntArgRegs = 16
|
||||
|
||||
// F0 - F15.
|
||||
FloatArgRegs = 16
|
||||
|
||||
EffectiveFloatRegSize = 8
|
||||
)
|
||||
38
src/internal/abi/abi_generic.go
Normal file
38
src/internal/abi/abi_generic.go
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !goexperiment.regabiargs && !amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// ABI-related constants.
|
||||
//
|
||||
// In the generic case, these are all zero
|
||||
// which lets them gracefully degrade to ABI0.
|
||||
|
||||
// IntArgRegs is the number of registers dedicated
|
||||
// to passing integer argument values. Result registers are identical
|
||||
// to argument registers, so this number is used for those too.
|
||||
IntArgRegs = 0
|
||||
|
||||
// FloatArgRegs is the number of registers dedicated
|
||||
// to passing floating-point argument values. Result registers are
|
||||
// identical to argument registers, so this number is used for
|
||||
// those too.
|
||||
FloatArgRegs = 0
|
||||
|
||||
// EffectiveFloatRegSize describes the width of floating point
|
||||
// registers on the current platform from the ABI's perspective.
|
||||
//
|
||||
// Since Go only supports 32-bit and 64-bit floating point primitives,
|
||||
// this number should be either 0, 4, or 8. 0 indicates no floating
|
||||
// point registers for the ABI or that floating point values will be
|
||||
// passed via the softfloat ABI.
|
||||
//
|
||||
// For platforms that support larger floating point register widths,
|
||||
// such as x87's 80-bit "registers" (not that we support x87 currently),
|
||||
// use 8.
|
||||
EffectiveFloatRegSize = 0
|
||||
)
|
||||
17
src/internal/abi/abi_loong64.go
Normal file
17
src/internal/abi/abi_loong64.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// See abi_generic.go.
|
||||
|
||||
// R4 - R19
|
||||
IntArgRegs = 16
|
||||
|
||||
// F0 - F15
|
||||
FloatArgRegs = 16
|
||||
|
||||
EffectiveFloatRegSize = 8
|
||||
)
|
||||
19
src/internal/abi/abi_ppc64x.go
Normal file
19
src/internal/abi/abi_ppc64x.go
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// See abi_generic.go.
|
||||
|
||||
// R3 - R10, R14 - R17.
|
||||
IntArgRegs = 12
|
||||
|
||||
// F1 - F12.
|
||||
FloatArgRegs = 12
|
||||
|
||||
EffectiveFloatRegSize = 8
|
||||
)
|
||||
17
src/internal/abi/abi_riscv64.go
Normal file
17
src/internal/abi/abi_riscv64.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// See abi_generic.go.
|
||||
|
||||
// X8 - X23
|
||||
IntArgRegs = 16
|
||||
|
||||
// F8 - F23.
|
||||
FloatArgRegs = 16
|
||||
|
||||
EffectiveFloatRegSize = 8
|
||||
)
|
||||
79
src/internal/abi/abi_test.go
Normal file
79
src/internal/abi/abi_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi_test
|
||||
|
||||
import (
|
||||
"internal/abi"
|
||||
"internal/testenv"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFuncPC(t *testing.T) {
|
||||
// Test that FuncPC* can get correct function PC.
|
||||
pcFromAsm := abi.FuncPCTestFnAddr
|
||||
|
||||
// Test FuncPC for locally defined function
|
||||
pcFromGo := abi.FuncPCTest()
|
||||
if pcFromGo != pcFromAsm {
|
||||
t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
|
||||
}
|
||||
|
||||
// Test FuncPC for imported function
|
||||
pcFromGo = abi.FuncPCABI0(abi.FuncPCTestFn)
|
||||
if pcFromGo != pcFromAsm {
|
||||
t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFuncPCCompileError(t *testing.T) {
|
||||
// Test that FuncPC* on a function of a mismatched ABI is rejected.
|
||||
testenv.MustHaveGoBuild(t)
|
||||
|
||||
// We want to test internal package, which we cannot normally import.
|
||||
// Run the assembler and compiler manually.
|
||||
tmpdir := t.TempDir()
|
||||
asmSrc := filepath.Join("testdata", "x.s")
|
||||
goSrc := filepath.Join("testdata", "x.go")
|
||||
symabi := filepath.Join(tmpdir, "symabi")
|
||||
obj := filepath.Join(tmpdir, "x.o")
|
||||
|
||||
// Write an importcfg file for the dependencies of the package.
|
||||
importcfgfile := filepath.Join(tmpdir, "hello.importcfg")
|
||||
testenv.WriteImportcfg(t, importcfgfile, nil, "internal/abi")
|
||||
|
||||
// parse assembly code for symabi.
|
||||
cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-p=p", "-gensymabis", "-o", symabi, asmSrc)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("go tool asm -gensymabis failed: %v\n%s", err, out)
|
||||
}
|
||||
|
||||
// compile go code.
|
||||
cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-importcfg="+importcfgfile, "-p=p", "-symabis", symabi, "-o", obj, goSrc)
|
||||
out, err = cmd.CombinedOutput()
|
||||
if err == nil {
|
||||
t.Fatalf("go tool compile did not fail")
|
||||
}
|
||||
|
||||
// Expect errors in line 17, 18, 20, no errors on other lines.
|
||||
want := []string{"x.go:17", "x.go:18", "x.go:20"}
|
||||
got := strings.Split(string(out), "\n")
|
||||
if got[len(got)-1] == "" {
|
||||
got = got[:len(got)-1] // remove last empty line
|
||||
}
|
||||
for i, s := range got {
|
||||
if !strings.Contains(s, want[i]) {
|
||||
t.Errorf("did not error on line %s", want[i])
|
||||
}
|
||||
}
|
||||
if len(got) != len(want) {
|
||||
t.Errorf("unexpected number of errors, want %d, got %d", len(want), len(got))
|
||||
}
|
||||
if t.Failed() {
|
||||
t.Logf("output:\n%s", string(out))
|
||||
}
|
||||
}
|
||||
27
src/internal/abi/abi_test.s
Normal file
27
src/internal/abi/abi_test.s
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#ifdef GOARCH_386
|
||||
#define PTRSIZE 4
|
||||
#endif
|
||||
#ifdef GOARCH_arm
|
||||
#define PTRSIZE 4
|
||||
#endif
|
||||
#ifdef GOARCH_mips
|
||||
#define PTRSIZE 4
|
||||
#endif
|
||||
#ifdef GOARCH_mipsle
|
||||
#define PTRSIZE 4
|
||||
#endif
|
||||
#ifndef PTRSIZE
|
||||
#define PTRSIZE 8
|
||||
#endif
|
||||
|
||||
TEXT internal∕abi·FuncPCTestFn(SB),NOSPLIT,$0-0
|
||||
RET
|
||||
|
||||
GLOBL internal∕abi·FuncPCTestFnAddr(SB), NOPTR, $PTRSIZE
|
||||
DATA internal∕abi·FuncPCTestFnAddr(SB)/PTRSIZE, $internal∕abi·FuncPCTestFn(SB)
|
||||
28
src/internal/abi/compiletype.go
Normal file
28
src/internal/abi/compiletype.go
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
// These functions are the build-time version of the Go type data structures.
|
||||
|
||||
// Their contents must be kept in sync with their definitions.
|
||||
// Because the host and target type sizes can differ, the compiler and
|
||||
// linker cannot use the host information that they might get from
|
||||
// either unsafe.Sizeof and Alignof, nor runtime, reflect, or reflectlite.
|
||||
|
||||
// CommonSize returns sizeof(Type) for a compilation target with a given ptrSize
|
||||
func CommonSize(ptrSize int) int { return 4*ptrSize + 8 + 8 }
|
||||
|
||||
// StructFieldSize returns sizeof(StructField) for a compilation target with a given ptrSize
|
||||
func StructFieldSize(ptrSize int) int { return 3 * ptrSize }
|
||||
|
||||
// UncommonSize returns sizeof(UncommonType). This currently does not depend on ptrSize.
|
||||
// This exported function is in an internal package, so it may change to depend on ptrSize in the future.
|
||||
func UncommonSize() uint64 { return 4 + 2 + 2 + 4 + 4 }
|
||||
|
||||
// TFlagOff returns the offset of Type.TFlag for a compilation target with a given ptrSize
|
||||
func TFlagOff(ptrSize int) int { return 2*ptrSize + 4 }
|
||||
|
||||
// ITabTypeOff returns the offset of ITab.Type for a compilation target with a given ptrSize
|
||||
func ITabTypeOff(ptrSize int) int { return ptrSize }
|
||||
33
src/internal/abi/escape.go
Normal file
33
src/internal/abi/escape.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// NoEscape hides the pointer p from escape analysis, preventing it
|
||||
// from escaping to the heap. It compiles down to nothing.
|
||||
//
|
||||
// WARNING: This is very subtle to use correctly. The caller must
|
||||
// ensure that it's truly safe for p to not escape to the heap by
|
||||
// maintaining runtime pointer invariants (for example, that globals
|
||||
// and the heap may not generally point into a stack).
|
||||
//
|
||||
//go:nosplit
|
||||
//go:nocheckptr
|
||||
func NoEscape(p unsafe.Pointer) unsafe.Pointer {
|
||||
x := uintptr(p)
|
||||
return unsafe.Pointer(x ^ 0)
|
||||
}
|
||||
|
||||
var alwaysFalse bool
|
||||
var escapeSink any
|
||||
|
||||
// Escape forces any pointers in x to escape to the heap.
|
||||
func Escape[T any](x T) T {
|
||||
if alwaysFalse {
|
||||
escapeSink = x
|
||||
}
|
||||
return x
|
||||
}
|
||||
14
src/internal/abi/export_test.go
Normal file
14
src/internal/abi/export_test.go
Normal file
@@ -0,0 +1,14 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
func FuncPCTestFn()
|
||||
|
||||
var FuncPCTestFnAddr uintptr // address of FuncPCTestFn, directly retrieved from assembly
|
||||
|
||||
//go:noinline
|
||||
func FuncPCTest() uintptr {
|
||||
return FuncPCABI0(FuncPCTestFn)
|
||||
}
|
||||
31
src/internal/abi/funcpc.go
Normal file
31
src/internal/abi/funcpc.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !gccgo
|
||||
|
||||
package abi
|
||||
|
||||
// FuncPC* intrinsics.
|
||||
//
|
||||
// CAREFUL: In programs with plugins, FuncPC* can return different values
|
||||
// for the same function (because there are actually multiple copies of
|
||||
// the same function in the address space). To be safe, don't use the
|
||||
// results of this function in any == expression. It is only safe to
|
||||
// use the result as an address at which to start executing code.
|
||||
|
||||
// FuncPCABI0 returns the entry PC of the function f, which must be a
|
||||
// direct reference of a function defined as ABI0. Otherwise it is a
|
||||
// compile-time error.
|
||||
//
|
||||
// Implemented as a compile intrinsic.
|
||||
func FuncPCABI0(f interface{}) uintptr
|
||||
|
||||
// FuncPCABIInternal returns the entry PC of the function f. If f is a
|
||||
// direct reference of a function, it must be defined as ABIInternal.
|
||||
// Otherwise it is a compile-time error. If f is not a direct reference
|
||||
// of a defined function, it assumes that f is a func value. Otherwise
|
||||
// the behavior is undefined.
|
||||
//
|
||||
// Implemented as a compile intrinsic.
|
||||
func FuncPCABIInternal(f interface{}) uintptr
|
||||
21
src/internal/abi/funcpc_gccgo.go
Normal file
21
src/internal/abi/funcpc_gccgo.go
Normal file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// For bootstrapping with gccgo.
|
||||
|
||||
//go:build gccgo
|
||||
|
||||
package abi
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func FuncPCABI0(f interface{}) uintptr {
|
||||
words := (*[2]unsafe.Pointer)(unsafe.Pointer(&f))
|
||||
return *(*uintptr)(unsafe.Pointer(words[1]))
|
||||
}
|
||||
|
||||
func FuncPCABIInternal(f interface{}) uintptr {
|
||||
words := (*[2]unsafe.Pointer)(unsafe.Pointer(&f))
|
||||
return *(*uintptr)(unsafe.Pointer(words[1]))
|
||||
}
|
||||
27
src/internal/abi/iface.go
Normal file
27
src/internal/abi/iface.go
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// The first word of every non-empty interface type contains an *ITab.
|
||||
// It records the underlying concrete type (Type), the interface type it
|
||||
// is implementing (Inter), and some ancillary information.
|
||||
//
|
||||
// allocated in non-garbage-collected memory
|
||||
type ITab struct {
|
||||
Inter *InterfaceType
|
||||
Type *Type
|
||||
Hash uint32 // copy of Type.Hash. Used for type switches.
|
||||
Fun [1]uintptr // variable sized. fun[0]==0 means Type does not implement Inter.
|
||||
}
|
||||
|
||||
// EmptyInterface describes the layout of a "interface{}" or a "any."
|
||||
// These are represented differently than non-empty interface, as the first
|
||||
// word always points to an abi.Type.
|
||||
type EmptyInterface struct {
|
||||
Type *Type
|
||||
Data unsafe.Pointer
|
||||
}
|
||||
19
src/internal/abi/map.go
Normal file
19
src/internal/abi/map.go
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
// Map constants common to several packages
|
||||
// runtime/runtime-gdb.py:MapTypePrinter contains its own copy
|
||||
const (
|
||||
// Maximum number of key/elem pairs a bucket can hold.
|
||||
MapBucketCountBits = 3 // log2 of number of elements in a bucket.
|
||||
MapBucketCount = 1 << MapBucketCountBits
|
||||
|
||||
// Maximum key or elem size to keep inline (instead of mallocing per element).
|
||||
// Must fit in a uint8.
|
||||
// Note: fast map functions cannot handle big elems (bigger than MapMaxElemBytes).
|
||||
MapMaxKeyBytes = 128
|
||||
MapMaxElemBytes = 128 // Must fit in a uint8.
|
||||
)
|
||||
18
src/internal/abi/rangefuncconsts.go
Normal file
18
src/internal/abi/rangefuncconsts.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
type RF_State int
|
||||
|
||||
// These constants are shared between the compiler, which uses them for state functions
|
||||
// and panic indicators, and the runtime, which turns them into more meaningful strings
|
||||
// For best code generation, RF_DONE and RF_READY should be 0 and 1.
|
||||
const (
|
||||
RF_DONE = RF_State(iota) // body of loop has exited in a non-panic way
|
||||
RF_READY // body of loop has not exited yet, is not running -- this is not a panic index
|
||||
RF_PANIC // body of loop is either currently running, or has panicked
|
||||
RF_EXHAUSTED // iterator function return, i.e., sequence is "exhausted"
|
||||
RF_MISSING_PANIC = 4 // body of loop panicked but iterator function defer-recovered it away
|
||||
)
|
||||
8
src/internal/abi/runtime.go
Normal file
8
src/internal/abi/runtime.go
Normal file
@@ -0,0 +1,8 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
// ZeroValSize is the size in bytes of runtime.zeroVal.
|
||||
const ZeroValSize = 1024
|
||||
33
src/internal/abi/stack.go
Normal file
33
src/internal/abi/stack.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
const (
|
||||
// StackNosplitBase is the base maximum number of bytes that a chain of
|
||||
// NOSPLIT functions can use.
|
||||
//
|
||||
// This value must be multiplied by the stack guard multiplier, so do not
|
||||
// use it directly. See runtime/stack.go:stackNosplit and
|
||||
// cmd/internal/objabi/stack.go:StackNosplit.
|
||||
StackNosplitBase = 800
|
||||
|
||||
// We have three different sequences for stack bounds checks, depending on
|
||||
// whether the stack frame of a function is small, big, or huge.
|
||||
|
||||
// After a stack split check the SP is allowed to be StackSmall bytes below
|
||||
// the stack guard.
|
||||
//
|
||||
// Functions that need frames <= StackSmall can perform the stack check
|
||||
// using a single comparison directly between the stack guard and the SP
|
||||
// because we ensure that StackSmall bytes of stack space are available
|
||||
// beyond the stack guard.
|
||||
StackSmall = 128
|
||||
|
||||
// Functions that need frames <= StackBig can assume that neither
|
||||
// SP-framesize nor stackGuard-StackSmall will underflow, and thus use a
|
||||
// more efficient check. In order to ensure this, StackBig must be <= the
|
||||
// size of the unmapped space at zero.
|
||||
StackBig = 4096
|
||||
)
|
||||
7
src/internal/abi/stub.s
Normal file
7
src/internal/abi/stub.s
Normal file
@@ -0,0 +1,7 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file silences errors about body-less functions
|
||||
// that are provided by intrinsics in the latest version of the compiler,
|
||||
// but may not be known to the bootstrap compiler.
|
||||
61
src/internal/abi/switch.go
Normal file
61
src/internal/abi/switch.go
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
type InterfaceSwitch struct {
|
||||
Cache *InterfaceSwitchCache
|
||||
NCases int
|
||||
|
||||
// Array of NCases elements.
|
||||
// Each case must be a non-empty interface type.
|
||||
Cases [1]*InterfaceType
|
||||
}
|
||||
|
||||
type InterfaceSwitchCache struct {
|
||||
Mask uintptr // mask for index. Must be a power of 2 minus 1
|
||||
Entries [1]InterfaceSwitchCacheEntry // Mask+1 entries total
|
||||
}
|
||||
|
||||
type InterfaceSwitchCacheEntry struct {
|
||||
// type of source value (a *Type)
|
||||
Typ uintptr
|
||||
// case # to dispatch to
|
||||
Case int
|
||||
// itab to use for resulting case variable (a *runtime.itab)
|
||||
Itab uintptr
|
||||
}
|
||||
|
||||
const go122InterfaceSwitchCache = true
|
||||
|
||||
func UseInterfaceSwitchCache(goarch string) bool {
|
||||
if !go122InterfaceSwitchCache {
|
||||
return false
|
||||
}
|
||||
// We need an atomic load instruction to make the cache multithreaded-safe.
|
||||
// (AtomicLoadPtr needs to be implemented in cmd/compile/internal/ssa/_gen/ARCH.rules.)
|
||||
switch goarch {
|
||||
case "amd64", "arm64", "loong64", "mips", "mipsle", "mips64", "mips64le", "ppc64", "ppc64le", "riscv64", "s390x":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type TypeAssert struct {
|
||||
Cache *TypeAssertCache
|
||||
Inter *InterfaceType
|
||||
CanFail bool
|
||||
}
|
||||
type TypeAssertCache struct {
|
||||
Mask uintptr
|
||||
Entries [1]TypeAssertCacheEntry
|
||||
}
|
||||
type TypeAssertCacheEntry struct {
|
||||
// type of source value (a *runtime._type)
|
||||
Typ uintptr
|
||||
// itab to use for result (a *runtime.itab)
|
||||
// nil if CanFail is set and conversion would fail.
|
||||
Itab uintptr
|
||||
}
|
||||
111
src/internal/abi/symtab.go
Normal file
111
src/internal/abi/symtab.go
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
// A FuncFlag records bits about a function, passed to the runtime.
|
||||
type FuncFlag uint8
|
||||
|
||||
const (
|
||||
// FuncFlagTopFrame indicates a function that appears at the top of its stack.
|
||||
// The traceback routine stop at such a function and consider that a
|
||||
// successful, complete traversal of the stack.
|
||||
// Examples of TopFrame functions include goexit, which appears
|
||||
// at the top of a user goroutine stack, and mstart, which appears
|
||||
// at the top of a system goroutine stack.
|
||||
FuncFlagTopFrame FuncFlag = 1 << iota
|
||||
|
||||
// FuncFlagSPWrite indicates a function that writes an arbitrary value to SP
|
||||
// (any write other than adding or subtracting a constant amount).
|
||||
// The traceback routines cannot encode such changes into the
|
||||
// pcsp tables, so the function traceback cannot safely unwind past
|
||||
// SPWrite functions. Stopping at an SPWrite function is considered
|
||||
// to be an incomplete unwinding of the stack. In certain contexts
|
||||
// (in particular garbage collector stack scans) that is a fatal error.
|
||||
FuncFlagSPWrite
|
||||
|
||||
// FuncFlagAsm indicates that a function was implemented in assembly.
|
||||
FuncFlagAsm
|
||||
)
|
||||
|
||||
// A FuncID identifies particular functions that need to be treated
|
||||
// specially by the runtime.
|
||||
// Note that in some situations involving plugins, there may be multiple
|
||||
// copies of a particular special runtime function.
|
||||
type FuncID uint8
|
||||
|
||||
const (
|
||||
// If you add a FuncID, you probably also want to add an entry to the map in
|
||||
// ../../cmd/internal/objabi/funcid.go
|
||||
|
||||
FuncIDNormal FuncID = iota // not a special function
|
||||
FuncID_abort
|
||||
FuncID_asmcgocall
|
||||
FuncID_asyncPreempt
|
||||
FuncID_cgocallback
|
||||
FuncID_corostart
|
||||
FuncID_debugCallV2
|
||||
FuncID_gcBgMarkWorker
|
||||
FuncID_goexit
|
||||
FuncID_gogo
|
||||
FuncID_gopanic
|
||||
FuncID_handleAsyncEvent
|
||||
FuncID_mcall
|
||||
FuncID_morestack
|
||||
FuncID_mstart
|
||||
FuncID_panicwrap
|
||||
FuncID_rt0_go
|
||||
FuncID_runfinq
|
||||
FuncID_runtime_main
|
||||
FuncID_sigpanic
|
||||
FuncID_systemstack
|
||||
FuncID_systemstack_switch
|
||||
FuncIDWrapper // any autogenerated code (hash/eq algorithms, method wrappers, etc.)
|
||||
)
|
||||
|
||||
// ArgsSizeUnknown is set in Func.argsize to mark all functions
|
||||
// whose argument size is unknown (C vararg functions, and
|
||||
// assembly code without an explicit specification).
|
||||
// This value is generated by the compiler, assembler, or linker.
|
||||
const ArgsSizeUnknown = -0x80000000
|
||||
|
||||
// IDs for PCDATA and FUNCDATA tables in Go binaries.
|
||||
//
|
||||
// These must agree with ../../../runtime/funcdata.h.
|
||||
const (
|
||||
PCDATA_UnsafePoint = 0
|
||||
PCDATA_StackMapIndex = 1
|
||||
PCDATA_InlTreeIndex = 2
|
||||
PCDATA_ArgLiveIndex = 3
|
||||
|
||||
FUNCDATA_ArgsPointerMaps = 0
|
||||
FUNCDATA_LocalsPointerMaps = 1
|
||||
FUNCDATA_StackObjects = 2
|
||||
FUNCDATA_InlTree = 3
|
||||
FUNCDATA_OpenCodedDeferInfo = 4
|
||||
FUNCDATA_ArgInfo = 5
|
||||
FUNCDATA_ArgLiveInfo = 6
|
||||
FUNCDATA_WrapInfo = 7
|
||||
)
|
||||
|
||||
// Special values for the PCDATA_UnsafePoint table.
|
||||
const (
|
||||
UnsafePointSafe = -1 // Safe for async preemption
|
||||
UnsafePointUnsafe = -2 // Unsafe for async preemption
|
||||
|
||||
// UnsafePointRestart1(2) apply on a sequence of instructions, within
|
||||
// which if an async preemption happens, we should back off the PC
|
||||
// to the start of the sequence when resuming.
|
||||
// We need two so we can distinguish the start/end of the sequence
|
||||
// in case that two sequences are next to each other.
|
||||
UnsafePointRestart1 = -3
|
||||
UnsafePointRestart2 = -4
|
||||
|
||||
// Like UnsafePointRestart1, but back to function entry if async preempted.
|
||||
UnsafePointRestartAtEntry = -5
|
||||
)
|
||||
|
||||
const MINFUNC = 16 // minimum size for a function
|
||||
|
||||
const FuncTabBucketSize = 256 * MINFUNC // size of bucket in the pc->func lookup table
|
||||
22
src/internal/abi/testdata/x.go
vendored
Normal file
22
src/internal/abi/testdata/x.go
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package x
|
||||
|
||||
import "internal/abi"
|
||||
|
||||
func Fn0() // defined in assembly
|
||||
|
||||
func Fn1() {}
|
||||
|
||||
var FnExpr func()
|
||||
|
||||
func test() {
|
||||
_ = abi.FuncPCABI0(Fn0) // line 16, no error
|
||||
_ = abi.FuncPCABIInternal(Fn0) // line 17, error
|
||||
_ = abi.FuncPCABI0(Fn1) // line 18, error
|
||||
_ = abi.FuncPCABIInternal(Fn1) // line 19, no error
|
||||
_ = abi.FuncPCABI0(FnExpr) // line 20, error
|
||||
_ = abi.FuncPCABIInternal(FnExpr) // line 21, no error
|
||||
}
|
||||
6
src/internal/abi/testdata/x.s
vendored
Normal file
6
src/internal/abi/testdata/x.s
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
TEXT ·Fn0(SB), 0, $0-0
|
||||
RET
|
||||
803
src/internal/abi/type.go
Normal file
803
src/internal/abi/type.go
Normal file
@@ -0,0 +1,803 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package abi
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Type is the runtime representation of a Go type.
|
||||
//
|
||||
// Be careful about accessing this type at build time, as the version
|
||||
// of this type in the compiler/linker may not have the same layout
|
||||
// as the version in the target binary, due to pointer width
|
||||
// differences and any experiments. Use cmd/compile/internal/rttype
|
||||
// or the functions in compiletype.go to access this type instead.
|
||||
// (TODO: this admonition applies to every type in this package.
|
||||
// Put it in some shared location?)
|
||||
type Type struct {
|
||||
Size_ uintptr
|
||||
PtrBytes uintptr // number of (prefix) bytes in the type that can contain pointers
|
||||
Hash uint32 // hash of type; avoids computation in hash tables
|
||||
TFlag TFlag // extra type information flags
|
||||
Align_ uint8 // alignment of variable with this type
|
||||
FieldAlign_ uint8 // alignment of struct field with this type
|
||||
Kind_ Kind // enumeration for C
|
||||
// function for comparing objects of this type
|
||||
// (ptr to object A, ptr to object B) -> ==?
|
||||
Equal func(unsafe.Pointer, unsafe.Pointer) bool
|
||||
// GCData stores the GC type data for the garbage collector.
|
||||
// If the KindGCProg bit is set in kind, GCData is a GC program.
|
||||
// Otherwise it is a ptrmask bitmap. See mbitmap.go for details.
|
||||
GCData *byte
|
||||
Str NameOff // string form
|
||||
PtrToThis TypeOff // type for pointer to this type, may be zero
|
||||
}
|
||||
|
||||
// A Kind represents the specific kind of type that a Type represents.
|
||||
// The zero Kind is not a valid kind.
|
||||
type Kind uint8
|
||||
|
||||
const (
|
||||
Invalid Kind = iota
|
||||
Bool
|
||||
Int
|
||||
Int8
|
||||
Int16
|
||||
Int32
|
||||
Int64
|
||||
Uint
|
||||
Uint8
|
||||
Uint16
|
||||
Uint32
|
||||
Uint64
|
||||
Uintptr
|
||||
Float32
|
||||
Float64
|
||||
Complex64
|
||||
Complex128
|
||||
Array
|
||||
Chan
|
||||
Func
|
||||
Interface
|
||||
Map
|
||||
Pointer
|
||||
Slice
|
||||
String
|
||||
Struct
|
||||
UnsafePointer
|
||||
)
|
||||
|
||||
const (
|
||||
// TODO (khr, drchase) why aren't these in TFlag? Investigate, fix if possible.
|
||||
KindDirectIface Kind = 1 << 5
|
||||
KindGCProg Kind = 1 << 6 // Type.gc points to GC program
|
||||
KindMask Kind = (1 << 5) - 1
|
||||
)
|
||||
|
||||
// TFlag is used by a Type to signal what extra type information is
|
||||
// available in the memory directly following the Type value.
|
||||
type TFlag uint8
|
||||
|
||||
const (
|
||||
// TFlagUncommon means that there is a data with a type, UncommonType,
|
||||
// just beyond the shared-per-type common data. That is, the data
|
||||
// for struct types will store their UncommonType at one offset, the
|
||||
// data for interface types will store their UncommonType at a different
|
||||
// offset. UncommonType is always accessed via a pointer that is computed
|
||||
// using trust-us-we-are-the-implementors pointer arithmetic.
|
||||
//
|
||||
// For example, if t.Kind() == Struct and t.tflag&TFlagUncommon != 0,
|
||||
// then t has UncommonType data and it can be accessed as:
|
||||
//
|
||||
// type structTypeUncommon struct {
|
||||
// structType
|
||||
// u UncommonType
|
||||
// }
|
||||
// u := &(*structTypeUncommon)(unsafe.Pointer(t)).u
|
||||
TFlagUncommon TFlag = 1 << 0
|
||||
|
||||
// TFlagExtraStar means the name in the str field has an
|
||||
// extraneous '*' prefix. This is because for most types T in
|
||||
// a program, the type *T also exists and reusing the str data
|
||||
// saves binary size.
|
||||
TFlagExtraStar TFlag = 1 << 1
|
||||
|
||||
// TFlagNamed means the type has a name.
|
||||
TFlagNamed TFlag = 1 << 2
|
||||
|
||||
// TFlagRegularMemory means that equal and hash functions can treat
|
||||
// this type as a single region of t.size bytes.
|
||||
TFlagRegularMemory TFlag = 1 << 3
|
||||
|
||||
// TFlagUnrolledBitmap marks special types that are unrolled-bitmap
|
||||
// versions of types with GC programs.
|
||||
// These types need to be deallocated when the underlying object
|
||||
// is freed.
|
||||
TFlagUnrolledBitmap TFlag = 1 << 4
|
||||
)
|
||||
|
||||
// NameOff is the offset to a name from moduledata.types. See resolveNameOff in runtime.
|
||||
type NameOff int32
|
||||
|
||||
// TypeOff is the offset to a type from moduledata.types. See resolveTypeOff in runtime.
|
||||
type TypeOff int32
|
||||
|
||||
// TextOff is an offset from the top of a text section. See (rtype).textOff in runtime.
|
||||
type TextOff int32
|
||||
|
||||
// String returns the name of k.
|
||||
func (k Kind) String() string {
|
||||
if int(k) < len(kindNames) {
|
||||
return kindNames[k]
|
||||
}
|
||||
return kindNames[0]
|
||||
}
|
||||
|
||||
var kindNames = []string{
|
||||
Invalid: "invalid",
|
||||
Bool: "bool",
|
||||
Int: "int",
|
||||
Int8: "int8",
|
||||
Int16: "int16",
|
||||
Int32: "int32",
|
||||
Int64: "int64",
|
||||
Uint: "uint",
|
||||
Uint8: "uint8",
|
||||
Uint16: "uint16",
|
||||
Uint32: "uint32",
|
||||
Uint64: "uint64",
|
||||
Uintptr: "uintptr",
|
||||
Float32: "float32",
|
||||
Float64: "float64",
|
||||
Complex64: "complex64",
|
||||
Complex128: "complex128",
|
||||
Array: "array",
|
||||
Chan: "chan",
|
||||
Func: "func",
|
||||
Interface: "interface",
|
||||
Map: "map",
|
||||
Pointer: "ptr",
|
||||
Slice: "slice",
|
||||
String: "string",
|
||||
Struct: "struct",
|
||||
UnsafePointer: "unsafe.Pointer",
|
||||
}
|
||||
|
||||
// TypeOf returns the abi.Type of some value.
|
||||
func TypeOf(a any) *Type {
|
||||
eface := *(*EmptyInterface)(unsafe.Pointer(&a))
|
||||
// Types are either static (for compiler-created types) or
|
||||
// heap-allocated but always reachable (for reflection-created
|
||||
// types, held in the central map). So there is no need to
|
||||
// escape types. noescape here help avoid unnecessary escape
|
||||
// of v.
|
||||
return (*Type)(NoEscape(unsafe.Pointer(eface.Type)))
|
||||
}
|
||||
|
||||
// TypeFor returns the abi.Type for a type parameter.
|
||||
func TypeFor[T any]() *Type {
|
||||
var v T
|
||||
if t := TypeOf(v); t != nil {
|
||||
return t // optimize for T being a non-interface kind
|
||||
}
|
||||
return TypeOf((*T)(nil)).Elem() // only for an interface kind
|
||||
}
|
||||
|
||||
func (t *Type) Kind() Kind { return t.Kind_ & KindMask }
|
||||
|
||||
func (t *Type) HasName() bool {
|
||||
return t.TFlag&TFlagNamed != 0
|
||||
}
|
||||
|
||||
// Pointers reports whether t contains pointers.
|
||||
func (t *Type) Pointers() bool { return t.PtrBytes != 0 }
|
||||
|
||||
// IfaceIndir reports whether t is stored indirectly in an interface value.
|
||||
func (t *Type) IfaceIndir() bool {
|
||||
return t.Kind_&KindDirectIface == 0
|
||||
}
|
||||
|
||||
// isDirectIface reports whether t is stored directly in an interface value.
|
||||
func (t *Type) IsDirectIface() bool {
|
||||
return t.Kind_&KindDirectIface != 0
|
||||
}
|
||||
|
||||
func (t *Type) GcSlice(begin, end uintptr) []byte {
|
||||
return unsafe.Slice(t.GCData, int(end))[begin:]
|
||||
}
|
||||
|
||||
// Method on non-interface type
|
||||
type Method struct {
|
||||
Name NameOff // name of method
|
||||
Mtyp TypeOff // method type (without receiver)
|
||||
Ifn TextOff // fn used in interface call (one-word receiver)
|
||||
Tfn TextOff // fn used for normal method call
|
||||
}
|
||||
|
||||
// UncommonType is present only for defined types or types with methods
|
||||
// (if T is a defined type, the uncommonTypes for T and *T have methods).
|
||||
// Using a pointer to this struct reduces the overall size required
|
||||
// to describe a non-defined type with no methods.
|
||||
type UncommonType struct {
|
||||
PkgPath NameOff // import path; empty for built-in types like int, string
|
||||
Mcount uint16 // number of methods
|
||||
Xcount uint16 // number of exported methods
|
||||
Moff uint32 // offset from this uncommontype to [mcount]Method
|
||||
_ uint32 // unused
|
||||
}
|
||||
|
||||
func (t *UncommonType) Methods() []Method {
|
||||
if t.Mcount == 0 {
|
||||
return nil
|
||||
}
|
||||
return (*[1 << 16]Method)(addChecked(unsafe.Pointer(t), uintptr(t.Moff), "t.mcount > 0"))[:t.Mcount:t.Mcount]
|
||||
}
|
||||
|
||||
func (t *UncommonType) ExportedMethods() []Method {
|
||||
if t.Xcount == 0 {
|
||||
return nil
|
||||
}
|
||||
return (*[1 << 16]Method)(addChecked(unsafe.Pointer(t), uintptr(t.Moff), "t.xcount > 0"))[:t.Xcount:t.Xcount]
|
||||
}
|
||||
|
||||
// addChecked returns p+x.
|
||||
//
|
||||
// The whySafe string is ignored, so that the function still inlines
|
||||
// as efficiently as p+x, but all call sites should use the string to
|
||||
// record why the addition is safe, which is to say why the addition
|
||||
// does not cause x to advance to the very end of p's allocation
|
||||
// and therefore point incorrectly at the next block in memory.
|
||||
func addChecked(p unsafe.Pointer, x uintptr, whySafe string) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(p) + x)
|
||||
}
|
||||
|
||||
// Imethod represents a method on an interface type
|
||||
type Imethod struct {
|
||||
Name NameOff // name of method
|
||||
Typ TypeOff // .(*FuncType) underneath
|
||||
}
|
||||
|
||||
// ArrayType represents a fixed array type.
|
||||
type ArrayType struct {
|
||||
Type
|
||||
Elem *Type // array element type
|
||||
Slice *Type // slice type
|
||||
Len uintptr
|
||||
}
|
||||
|
||||
// Len returns the length of t if t is an array type, otherwise 0
|
||||
func (t *Type) Len() int {
|
||||
if t.Kind() == Array {
|
||||
return int((*ArrayType)(unsafe.Pointer(t)).Len)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (t *Type) Common() *Type {
|
||||
return t
|
||||
}
|
||||
|
||||
type ChanDir int
|
||||
|
||||
const (
|
||||
RecvDir ChanDir = 1 << iota // <-chan
|
||||
SendDir // chan<-
|
||||
BothDir = RecvDir | SendDir // chan
|
||||
InvalidDir ChanDir = 0
|
||||
)
|
||||
|
||||
// ChanType represents a channel type
|
||||
type ChanType struct {
|
||||
Type
|
||||
Elem *Type
|
||||
Dir ChanDir
|
||||
}
|
||||
|
||||
type structTypeUncommon struct {
|
||||
StructType
|
||||
u UncommonType
|
||||
}
|
||||
|
||||
// ChanDir returns the direction of t if t is a channel type, otherwise InvalidDir (0).
|
||||
func (t *Type) ChanDir() ChanDir {
|
||||
if t.Kind() == Chan {
|
||||
ch := (*ChanType)(unsafe.Pointer(t))
|
||||
return ch.Dir
|
||||
}
|
||||
return InvalidDir
|
||||
}
|
||||
|
||||
// Uncommon returns a pointer to T's "uncommon" data if there is any, otherwise nil
|
||||
func (t *Type) Uncommon() *UncommonType {
|
||||
if t.TFlag&TFlagUncommon == 0 {
|
||||
return nil
|
||||
}
|
||||
switch t.Kind() {
|
||||
case Struct:
|
||||
return &(*structTypeUncommon)(unsafe.Pointer(t)).u
|
||||
case Pointer:
|
||||
type u struct {
|
||||
PtrType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Func:
|
||||
type u struct {
|
||||
FuncType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Slice:
|
||||
type u struct {
|
||||
SliceType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Array:
|
||||
type u struct {
|
||||
ArrayType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Chan:
|
||||
type u struct {
|
||||
ChanType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Map:
|
||||
type u struct {
|
||||
MapType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
case Interface:
|
||||
type u struct {
|
||||
InterfaceType
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
default:
|
||||
type u struct {
|
||||
Type
|
||||
u UncommonType
|
||||
}
|
||||
return &(*u)(unsafe.Pointer(t)).u
|
||||
}
|
||||
}
|
||||
|
||||
// Elem returns the element type for t if t is an array, channel, map, pointer, or slice, otherwise nil.
|
||||
func (t *Type) Elem() *Type {
|
||||
switch t.Kind() {
|
||||
case Array:
|
||||
tt := (*ArrayType)(unsafe.Pointer(t))
|
||||
return tt.Elem
|
||||
case Chan:
|
||||
tt := (*ChanType)(unsafe.Pointer(t))
|
||||
return tt.Elem
|
||||
case Map:
|
||||
tt := (*MapType)(unsafe.Pointer(t))
|
||||
return tt.Elem
|
||||
case Pointer:
|
||||
tt := (*PtrType)(unsafe.Pointer(t))
|
||||
return tt.Elem
|
||||
case Slice:
|
||||
tt := (*SliceType)(unsafe.Pointer(t))
|
||||
return tt.Elem
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// StructType returns t cast to a *StructType, or nil if its tag does not match.
|
||||
func (t *Type) StructType() *StructType {
|
||||
if t.Kind() != Struct {
|
||||
return nil
|
||||
}
|
||||
return (*StructType)(unsafe.Pointer(t))
|
||||
}
|
||||
|
||||
// MapType returns t cast to a *MapType, or nil if its tag does not match.
|
||||
func (t *Type) MapType() *MapType {
|
||||
if t.Kind() != Map {
|
||||
return nil
|
||||
}
|
||||
return (*MapType)(unsafe.Pointer(t))
|
||||
}
|
||||
|
||||
// ArrayType returns t cast to a *ArrayType, or nil if its tag does not match.
|
||||
func (t *Type) ArrayType() *ArrayType {
|
||||
if t.Kind() != Array {
|
||||
return nil
|
||||
}
|
||||
return (*ArrayType)(unsafe.Pointer(t))
|
||||
}
|
||||
|
||||
// FuncType returns t cast to a *FuncType, or nil if its tag does not match.
|
||||
func (t *Type) FuncType() *FuncType {
|
||||
if t.Kind() != Func {
|
||||
return nil
|
||||
}
|
||||
return (*FuncType)(unsafe.Pointer(t))
|
||||
}
|
||||
|
||||
// InterfaceType returns t cast to a *InterfaceType, or nil if its tag does not match.
|
||||
func (t *Type) InterfaceType() *InterfaceType {
|
||||
if t.Kind() != Interface {
|
||||
return nil
|
||||
}
|
||||
return (*InterfaceType)(unsafe.Pointer(t))
|
||||
}
|
||||
|
||||
// Size returns the size of data with type t.
|
||||
func (t *Type) Size() uintptr { return t.Size_ }
|
||||
|
||||
// Align returns the alignment of data with type t.
|
||||
func (t *Type) Align() int { return int(t.Align_) }
|
||||
|
||||
func (t *Type) FieldAlign() int { return int(t.FieldAlign_) }
|
||||
|
||||
type InterfaceType struct {
|
||||
Type
|
||||
PkgPath Name // import path
|
||||
Methods []Imethod // sorted by hash
|
||||
}
|
||||
|
||||
func (t *Type) ExportedMethods() []Method {
|
||||
ut := t.Uncommon()
|
||||
if ut == nil {
|
||||
return nil
|
||||
}
|
||||
return ut.ExportedMethods()
|
||||
}
|
||||
|
||||
func (t *Type) NumMethod() int {
|
||||
if t.Kind() == Interface {
|
||||
tt := (*InterfaceType)(unsafe.Pointer(t))
|
||||
return tt.NumMethod()
|
||||
}
|
||||
return len(t.ExportedMethods())
|
||||
}
|
||||
|
||||
// NumMethod returns the number of interface methods in the type's method set.
|
||||
func (t *InterfaceType) NumMethod() int { return len(t.Methods) }
|
||||
|
||||
type MapType struct {
|
||||
Type
|
||||
Key *Type
|
||||
Elem *Type
|
||||
Bucket *Type // internal type representing a hash bucket
|
||||
// function for hashing keys (ptr to key, seed) -> hash
|
||||
Hasher func(unsafe.Pointer, uintptr) uintptr
|
||||
KeySize uint8 // size of key slot
|
||||
ValueSize uint8 // size of elem slot
|
||||
BucketSize uint16 // size of bucket
|
||||
Flags uint32
|
||||
}
|
||||
|
||||
// Note: flag values must match those used in the TMAP case
|
||||
// in ../cmd/compile/internal/reflectdata/reflect.go:writeType.
|
||||
func (mt *MapType) IndirectKey() bool { // store ptr to key instead of key itself
|
||||
return mt.Flags&1 != 0
|
||||
}
|
||||
func (mt *MapType) IndirectElem() bool { // store ptr to elem instead of elem itself
|
||||
return mt.Flags&2 != 0
|
||||
}
|
||||
func (mt *MapType) ReflexiveKey() bool { // true if k==k for all keys
|
||||
return mt.Flags&4 != 0
|
||||
}
|
||||
func (mt *MapType) NeedKeyUpdate() bool { // true if we need to update key on an overwrite
|
||||
return mt.Flags&8 != 0
|
||||
}
|
||||
func (mt *MapType) HashMightPanic() bool { // true if hash function might panic
|
||||
return mt.Flags&16 != 0
|
||||
}
|
||||
|
||||
func (t *Type) Key() *Type {
|
||||
if t.Kind() == Map {
|
||||
return (*MapType)(unsafe.Pointer(t)).Key
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type SliceType struct {
|
||||
Type
|
||||
Elem *Type // slice element type
|
||||
}
|
||||
|
||||
// funcType represents a function type.
|
||||
//
|
||||
// A *Type for each in and out parameter is stored in an array that
|
||||
// directly follows the funcType (and possibly its uncommonType). So
|
||||
// a function type with one method, one input, and one output is:
|
||||
//
|
||||
// struct {
|
||||
// funcType
|
||||
// uncommonType
|
||||
// [2]*rtype // [0] is in, [1] is out
|
||||
// }
|
||||
type FuncType struct {
|
||||
Type
|
||||
InCount uint16
|
||||
OutCount uint16 // top bit is set if last input parameter is ...
|
||||
}
|
||||
|
||||
func (t *FuncType) In(i int) *Type {
|
||||
return t.InSlice()[i]
|
||||
}
|
||||
|
||||
func (t *FuncType) NumIn() int {
|
||||
return int(t.InCount)
|
||||
}
|
||||
|
||||
func (t *FuncType) NumOut() int {
|
||||
return int(t.OutCount & (1<<15 - 1))
|
||||
}
|
||||
|
||||
func (t *FuncType) Out(i int) *Type {
|
||||
return (t.OutSlice()[i])
|
||||
}
|
||||
|
||||
func (t *FuncType) InSlice() []*Type {
|
||||
uadd := unsafe.Sizeof(*t)
|
||||
if t.TFlag&TFlagUncommon != 0 {
|
||||
uadd += unsafe.Sizeof(UncommonType{})
|
||||
}
|
||||
if t.InCount == 0 {
|
||||
return nil
|
||||
}
|
||||
return (*[1 << 16]*Type)(addChecked(unsafe.Pointer(t), uadd, "t.inCount > 0"))[:t.InCount:t.InCount]
|
||||
}
|
||||
func (t *FuncType) OutSlice() []*Type {
|
||||
outCount := uint16(t.NumOut())
|
||||
if outCount == 0 {
|
||||
return nil
|
||||
}
|
||||
uadd := unsafe.Sizeof(*t)
|
||||
if t.TFlag&TFlagUncommon != 0 {
|
||||
uadd += unsafe.Sizeof(UncommonType{})
|
||||
}
|
||||
return (*[1 << 17]*Type)(addChecked(unsafe.Pointer(t), uadd, "outCount > 0"))[t.InCount : t.InCount+outCount : t.InCount+outCount]
|
||||
}
|
||||
|
||||
func (t *FuncType) IsVariadic() bool {
|
||||
return t.OutCount&(1<<15) != 0
|
||||
}
|
||||
|
||||
type PtrType struct {
|
||||
Type
|
||||
Elem *Type // pointer element (pointed at) type
|
||||
}
|
||||
|
||||
type StructField struct {
|
||||
Name Name // name is always non-empty
|
||||
Typ *Type // type of field
|
||||
Offset uintptr // byte offset of field
|
||||
}
|
||||
|
||||
func (f *StructField) Embedded() bool {
|
||||
return f.Name.IsEmbedded()
|
||||
}
|
||||
|
||||
type StructType struct {
|
||||
Type
|
||||
PkgPath Name
|
||||
Fields []StructField
|
||||
}
|
||||
|
||||
// Name is an encoded type Name with optional extra data.
|
||||
//
|
||||
// The first byte is a bit field containing:
|
||||
//
|
||||
// 1<<0 the name is exported
|
||||
// 1<<1 tag data follows the name
|
||||
// 1<<2 pkgPath nameOff follows the name and tag
|
||||
// 1<<3 the name is of an embedded (a.k.a. anonymous) field
|
||||
//
|
||||
// Following that, there is a varint-encoded length of the name,
|
||||
// followed by the name itself.
|
||||
//
|
||||
// If tag data is present, it also has a varint-encoded length
|
||||
// followed by the tag itself.
|
||||
//
|
||||
// If the import path follows, then 4 bytes at the end of
|
||||
// the data form a nameOff. The import path is only set for concrete
|
||||
// methods that are defined in a different package than their type.
|
||||
//
|
||||
// If a name starts with "*", then the exported bit represents
|
||||
// whether the pointed to type is exported.
|
||||
//
|
||||
// Note: this encoding must match here and in:
|
||||
// cmd/compile/internal/reflectdata/reflect.go
|
||||
// cmd/link/internal/ld/decodesym.go
|
||||
|
||||
type Name struct {
|
||||
Bytes *byte
|
||||
}
|
||||
|
||||
// DataChecked does pointer arithmetic on n's Bytes, and that arithmetic is asserted to
|
||||
// be safe for the reason in whySafe (which can appear in a backtrace, etc.)
|
||||
func (n Name) DataChecked(off int, whySafe string) *byte {
|
||||
return (*byte)(addChecked(unsafe.Pointer(n.Bytes), uintptr(off), whySafe))
|
||||
}
|
||||
|
||||
// Data does pointer arithmetic on n's Bytes, and that arithmetic is asserted to
|
||||
// be safe because the runtime made the call (other packages use DataChecked)
|
||||
func (n Name) Data(off int) *byte {
|
||||
return (*byte)(addChecked(unsafe.Pointer(n.Bytes), uintptr(off), "the runtime doesn't need to give you a reason"))
|
||||
}
|
||||
|
||||
// IsExported returns "is n exported?"
|
||||
func (n Name) IsExported() bool {
|
||||
return (*n.Bytes)&(1<<0) != 0
|
||||
}
|
||||
|
||||
// HasTag returns true iff there is tag data following this name
|
||||
func (n Name) HasTag() bool {
|
||||
return (*n.Bytes)&(1<<1) != 0
|
||||
}
|
||||
|
||||
// IsEmbedded returns true iff n is embedded (an anonymous field).
|
||||
func (n Name) IsEmbedded() bool {
|
||||
return (*n.Bytes)&(1<<3) != 0
|
||||
}
|
||||
|
||||
// ReadVarint parses a varint as encoded by encoding/binary.
|
||||
// It returns the number of encoded bytes and the encoded value.
|
||||
func (n Name) ReadVarint(off int) (int, int) {
|
||||
v := 0
|
||||
for i := 0; ; i++ {
|
||||
x := *n.DataChecked(off+i, "read varint")
|
||||
v += int(x&0x7f) << (7 * i)
|
||||
if x&0x80 == 0 {
|
||||
return i + 1, v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IsBlank indicates whether n is "_".
|
||||
func (n Name) IsBlank() bool {
|
||||
if n.Bytes == nil {
|
||||
return false
|
||||
}
|
||||
_, l := n.ReadVarint(1)
|
||||
return l == 1 && *n.Data(2) == '_'
|
||||
}
|
||||
|
||||
// writeVarint writes n to buf in varint form. Returns the
|
||||
// number of bytes written. n must be nonnegative.
|
||||
// Writes at most 10 bytes.
|
||||
func writeVarint(buf []byte, n int) int {
|
||||
for i := 0; ; i++ {
|
||||
b := byte(n & 0x7f)
|
||||
n >>= 7
|
||||
if n == 0 {
|
||||
buf[i] = b
|
||||
return i + 1
|
||||
}
|
||||
buf[i] = b | 0x80
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the tag string for n, or empty if there is none.
|
||||
func (n Name) Name() string {
|
||||
if n.Bytes == nil {
|
||||
return ""
|
||||
}
|
||||
i, l := n.ReadVarint(1)
|
||||
return unsafe.String(n.DataChecked(1+i, "non-empty string"), l)
|
||||
}
|
||||
|
||||
// Tag returns the tag string for n, or empty if there is none.
|
||||
func (n Name) Tag() string {
|
||||
if !n.HasTag() {
|
||||
return ""
|
||||
}
|
||||
i, l := n.ReadVarint(1)
|
||||
i2, l2 := n.ReadVarint(1 + i + l)
|
||||
return unsafe.String(n.DataChecked(1+i+l+i2, "non-empty string"), l2)
|
||||
}
|
||||
|
||||
func NewName(n, tag string, exported, embedded bool) Name {
|
||||
if len(n) >= 1<<29 {
|
||||
panic("abi.NewName: name too long: " + n[:1024] + "...")
|
||||
}
|
||||
if len(tag) >= 1<<29 {
|
||||
panic("abi.NewName: tag too long: " + tag[:1024] + "...")
|
||||
}
|
||||
var nameLen [10]byte
|
||||
var tagLen [10]byte
|
||||
nameLenLen := writeVarint(nameLen[:], len(n))
|
||||
tagLenLen := writeVarint(tagLen[:], len(tag))
|
||||
|
||||
var bits byte
|
||||
l := 1 + nameLenLen + len(n)
|
||||
if exported {
|
||||
bits |= 1 << 0
|
||||
}
|
||||
if len(tag) > 0 {
|
||||
l += tagLenLen + len(tag)
|
||||
bits |= 1 << 1
|
||||
}
|
||||
if embedded {
|
||||
bits |= 1 << 3
|
||||
}
|
||||
|
||||
b := make([]byte, l)
|
||||
b[0] = bits
|
||||
copy(b[1:], nameLen[:nameLenLen])
|
||||
copy(b[1+nameLenLen:], n)
|
||||
if len(tag) > 0 {
|
||||
tb := b[1+nameLenLen+len(n):]
|
||||
copy(tb, tagLen[:tagLenLen])
|
||||
copy(tb[tagLenLen:], tag)
|
||||
}
|
||||
|
||||
return Name{Bytes: &b[0]}
|
||||
}
|
||||
|
||||
const (
|
||||
TraceArgsLimit = 10 // print no more than 10 args/components
|
||||
TraceArgsMaxDepth = 5 // no more than 5 layers of nesting
|
||||
|
||||
// maxLen is a (conservative) upper bound of the byte stream length. For
|
||||
// each arg/component, it has no more than 2 bytes of data (size, offset),
|
||||
// and no more than one {, }, ... at each level (it cannot have both the
|
||||
// data and ... unless it is the last one, just be conservative). Plus 1
|
||||
// for _endSeq.
|
||||
TraceArgsMaxLen = (TraceArgsMaxDepth*3+2)*TraceArgsLimit + 1
|
||||
)
|
||||
|
||||
// Populate the data.
|
||||
// The data is a stream of bytes, which contains the offsets and sizes of the
|
||||
// non-aggregate arguments or non-aggregate fields/elements of aggregate-typed
|
||||
// arguments, along with special "operators". Specifically,
|
||||
// - for each non-aggregate arg/field/element, its offset from FP (1 byte) and
|
||||
// size (1 byte)
|
||||
// - special operators:
|
||||
// - 0xff - end of sequence
|
||||
// - 0xfe - print { (at the start of an aggregate-typed argument)
|
||||
// - 0xfd - print } (at the end of an aggregate-typed argument)
|
||||
// - 0xfc - print ... (more args/fields/elements)
|
||||
// - 0xfb - print _ (offset too large)
|
||||
const (
|
||||
TraceArgsEndSeq = 0xff
|
||||
TraceArgsStartAgg = 0xfe
|
||||
TraceArgsEndAgg = 0xfd
|
||||
TraceArgsDotdotdot = 0xfc
|
||||
TraceArgsOffsetTooLarge = 0xfb
|
||||
TraceArgsSpecial = 0xf0 // above this are operators, below this are ordinary offsets
|
||||
)
|
||||
|
||||
// MaxPtrmaskBytes is the maximum length of a GC ptrmask bitmap,
|
||||
// which holds 1-bit entries describing where pointers are in a given type.
|
||||
// Above this length, the GC information is recorded as a GC program,
|
||||
// which can express repetition compactly. In either form, the
|
||||
// information is used by the runtime to initialize the heap bitmap,
|
||||
// and for large types (like 128 or more words), they are roughly the
|
||||
// same speed. GC programs are never much larger and often more
|
||||
// compact. (If large arrays are involved, they can be arbitrarily
|
||||
// more compact.)
|
||||
//
|
||||
// The cutoff must be large enough that any allocation large enough to
|
||||
// use a GC program is large enough that it does not share heap bitmap
|
||||
// bytes with any other objects, allowing the GC program execution to
|
||||
// assume an aligned start and not use atomic operations. In the current
|
||||
// runtime, this means all malloc size classes larger than the cutoff must
|
||||
// be multiples of four words. On 32-bit systems that's 16 bytes, and
|
||||
// all size classes >= 16 bytes are 16-byte aligned, so no real constraint.
|
||||
// On 64-bit systems, that's 32 bytes, and 32-byte alignment is guaranteed
|
||||
// for size classes >= 256 bytes. On a 64-bit system, 256 bytes allocated
|
||||
// is 32 pointers, the bits for which fit in 4 bytes. So MaxPtrmaskBytes
|
||||
// must be >= 4.
|
||||
//
|
||||
// We used to use 16 because the GC programs do have some constant overhead
|
||||
// to get started, and processing 128 pointers seems to be enough to
|
||||
// amortize that overhead well.
|
||||
//
|
||||
// To make sure that the runtime's chansend can call typeBitsBulkBarrier,
|
||||
// we raised the limit to 2048, so that even 32-bit systems are guaranteed to
|
||||
// use bitmaps for objects up to 64 kB in size.
|
||||
const MaxPtrmaskBytes = 2048
|
||||
19
src/internal/asan/asan.go
Normal file
19
src/internal/asan/asan.go
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build asan
|
||||
|
||||
package asan
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const Enabled = true
|
||||
|
||||
//go:linkname Read runtime.asanread
|
||||
func Read(addr unsafe.Pointer, len uintptr)
|
||||
|
||||
//go:linkname Write runtime.asanwrite
|
||||
func Write(addr unsafe.Pointer, len uintptr)
|
||||
10
src/internal/asan/doc.go
Normal file
10
src/internal/asan/doc.go
Normal file
@@ -0,0 +1,10 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package asan contains helper functions for manually instrumenting
|
||||
// code for the address sanitizer.
|
||||
// The runtime package intentionally exports these functions only in the
|
||||
// asan build; this package exports them unconditionally but without the
|
||||
// "asan" build tag they are no-ops.
|
||||
package asan
|
||||
17
src/internal/asan/noasan.go
Normal file
17
src/internal/asan/noasan.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !asan
|
||||
|
||||
package asan
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const Enabled = false
|
||||
|
||||
func Read(addr unsafe.Pointer, len uintptr) {}
|
||||
|
||||
func Write(addr unsafe.Pointer, len uintptr) {}
|
||||
778
src/internal/bisect/bisect.go
Normal file
778
src/internal/bisect/bisect.go
Normal file
@@ -0,0 +1,778 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package bisect can be used by compilers and other programs
|
||||
// to serve as a target for the bisect debugging tool.
|
||||
// See [golang.org/x/tools/cmd/bisect] for details about using the tool.
|
||||
//
|
||||
// To be a bisect target, allowing bisect to help determine which of a set of independent
|
||||
// changes provokes a failure, a program needs to:
|
||||
//
|
||||
// 1. Define a way to accept a change pattern on its command line or in its environment.
|
||||
// The most common mechanism is a command-line flag.
|
||||
// The pattern can be passed to [New] to create a [Matcher], the compiled form of a pattern.
|
||||
//
|
||||
// 2. Assign each change a unique ID. One possibility is to use a sequence number,
|
||||
// but the most common mechanism is to hash some kind of identifying information
|
||||
// like the file and line number where the change might be applied.
|
||||
// [Hash] hashes its arguments to compute an ID.
|
||||
//
|
||||
// 3. Enable each change that the pattern says should be enabled.
|
||||
// The [Matcher.ShouldEnable] method answers this question for a given change ID.
|
||||
//
|
||||
// 4. Print a report identifying each change that the pattern says should be printed.
|
||||
// The [Matcher.ShouldPrint] method answers this question for a given change ID.
|
||||
// The report consists of one more lines on standard error or standard output
|
||||
// that contain a “match marker”. [Marker] returns the match marker for a given ID.
|
||||
// When bisect reports a change as causing the failure, it identifies the change
|
||||
// by printing the report lines with the match marker removed.
|
||||
//
|
||||
// # Example Usage
|
||||
//
|
||||
// A program starts by defining how it receives the pattern. In this example, we will assume a flag.
|
||||
// The next step is to compile the pattern:
|
||||
//
|
||||
// m, err := bisect.New(patternFlag)
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
//
|
||||
// Then, each time a potential change is considered, the program computes
|
||||
// a change ID by hashing identifying information (source file and line, in this case)
|
||||
// and then calls m.ShouldPrint and m.ShouldEnable to decide whether to
|
||||
// print and enable the change, respectively. The two can return different values
|
||||
// depending on whether bisect is trying to find a minimal set of changes to
|
||||
// disable or to enable to provoke the failure.
|
||||
//
|
||||
// It is usually helpful to write a helper function that accepts the identifying information
|
||||
// and then takes care of hashing, printing, and reporting whether the identified change
|
||||
// should be enabled. For example, a helper for changes identified by a file and line number
|
||||
// would be:
|
||||
//
|
||||
// func ShouldEnable(file string, line int) {
|
||||
// h := bisect.Hash(file, line)
|
||||
// if m.ShouldPrint(h) {
|
||||
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
|
||||
// }
|
||||
// return m.ShouldEnable(h)
|
||||
// }
|
||||
//
|
||||
// Finally, note that New returns a nil Matcher when there is no pattern,
|
||||
// meaning that the target is not running under bisect at all,
|
||||
// so all changes should be enabled and none should be printed.
|
||||
// In that common case, the computation of the hash can be avoided entirely
|
||||
// by checking for m == nil first:
|
||||
//
|
||||
// func ShouldEnable(file string, line int) bool {
|
||||
// if m == nil {
|
||||
// return true
|
||||
// }
|
||||
// h := bisect.Hash(file, line)
|
||||
// if m.ShouldPrint(h) {
|
||||
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
|
||||
// }
|
||||
// return m.ShouldEnable(h)
|
||||
// }
|
||||
//
|
||||
// When the identifying information is expensive to format, this code can call
|
||||
// [Matcher.MarkerOnly] to find out whether short report lines containing only the
|
||||
// marker are permitted for a given run. (Bisect permits such lines when it is
|
||||
// still exploring the space of possible changes and will not be showing the
|
||||
// output to the user.) If so, the client can choose to print only the marker:
|
||||
//
|
||||
// func ShouldEnable(file string, line int) bool {
|
||||
// if m == nil {
|
||||
// return true
|
||||
// }
|
||||
// h := bisect.Hash(file, line)
|
||||
// if m.ShouldPrint(h) {
|
||||
// if m.MarkerOnly() {
|
||||
// bisect.PrintMarker(os.Stderr, h)
|
||||
// } else {
|
||||
// fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line)
|
||||
// }
|
||||
// }
|
||||
// return m.ShouldEnable(h)
|
||||
// }
|
||||
//
|
||||
// This specific helper – deciding whether to enable a change identified by
|
||||
// file and line number and printing about the change when necessary – is
|
||||
// provided by the [Matcher.FileLine] method.
|
||||
//
|
||||
// Another common usage is deciding whether to make a change in a function
|
||||
// based on the caller's stack, to identify the specific calling contexts that the
|
||||
// change breaks. The [Matcher.Stack] method takes care of obtaining the stack,
|
||||
// printing it when necessary, and reporting whether to enable the change
|
||||
// based on that stack.
|
||||
//
|
||||
// # Pattern Syntax
|
||||
//
|
||||
// Patterns are generated by the bisect tool and interpreted by [New].
|
||||
// Users should not have to understand the patterns except when
|
||||
// debugging a target's bisect support or debugging the bisect tool itself.
|
||||
//
|
||||
// The pattern syntax selecting a change is a sequence of bit strings
|
||||
// separated by + and - operators. Each bit string denotes the set of
|
||||
// changes with IDs ending in those bits, + is set addition, - is set subtraction,
|
||||
// and the expression is evaluated in the usual left-to-right order.
|
||||
// The special binary number “y” denotes the set of all changes,
|
||||
// standing in for the empty bit string.
|
||||
// In the expression, all the + operators must appear before all the - operators.
|
||||
// A leading + adds to an empty set. A leading - subtracts from the set of all
|
||||
// possible suffixes.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// - “01+10” and “+01+10” both denote the set of changes
|
||||
// with IDs ending with the bits 01 or 10.
|
||||
//
|
||||
// - “01+10-1001” denotes the set of changes with IDs
|
||||
// ending with the bits 01 or 10, but excluding those ending in 1001.
|
||||
//
|
||||
// - “-01-1000” and “y-01-1000 both denote the set of all changes
|
||||
// with IDs not ending in 01 nor 1000.
|
||||
//
|
||||
// - “0+1-01+001” is not a valid pattern, because all the + operators do not
|
||||
// appear before all the - operators.
|
||||
//
|
||||
// In the syntaxes described so far, the pattern specifies the changes to
|
||||
// enable and report. If a pattern is prefixed by a “!”, the meaning
|
||||
// changes: the pattern specifies the changes to DISABLE and report. This
|
||||
// mode of operation is needed when a program passes with all changes
|
||||
// enabled but fails with no changes enabled. In this case, bisect
|
||||
// searches for minimal sets of changes to disable.
|
||||
// Put another way, the leading “!” inverts the result from [Matcher.ShouldEnable]
|
||||
// but does not invert the result from [Matcher.ShouldPrint].
|
||||
//
|
||||
// As a convenience for manual debugging, “n” is an alias for “!y”,
|
||||
// meaning to disable and report all changes.
|
||||
//
|
||||
// Finally, a leading “v” in the pattern indicates that the reports will be shown
|
||||
// to the user of bisect to describe the changes involved in a failure.
|
||||
// At the API level, the leading “v” causes [Matcher.Visible] to return true.
|
||||
// See the next section for details.
|
||||
//
|
||||
// # Match Reports
|
||||
//
|
||||
// The target program must enable only those changed matched
|
||||
// by the pattern, and it must print a match report for each such change.
|
||||
// A match report consists of one or more lines of text that will be
|
||||
// printed by the bisect tool to describe a change implicated in causing
|
||||
// a failure. Each line in the report for a given change must contain a
|
||||
// match marker with that change ID, as returned by [Marker].
|
||||
// The markers are elided when displaying the lines to the user.
|
||||
//
|
||||
// A match marker has the form “[bisect-match 0x1234]” where
|
||||
// 0x1234 is the change ID in hexadecimal.
|
||||
// An alternate form is “[bisect-match 010101]”, giving the change ID in binary.
|
||||
//
|
||||
// When [Matcher.Visible] returns false, the match reports are only
|
||||
// being processed by bisect to learn the set of enabled changes,
|
||||
// not shown to the user, meaning that each report can be a match
|
||||
// marker on a line by itself, eliding the usual textual description.
|
||||
// When the textual description is expensive to compute,
|
||||
// checking [Matcher.Visible] can help the avoid that expense
|
||||
// in most runs.
|
||||
package bisect
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// New creates and returns a new Matcher implementing the given pattern.
|
||||
// The pattern syntax is defined in the package doc comment.
|
||||
//
|
||||
// In addition to the pattern syntax syntax, New("") returns nil, nil.
|
||||
// The nil *Matcher is valid for use: it returns true from ShouldEnable
|
||||
// and false from ShouldPrint for all changes. Callers can avoid calling
|
||||
// [Hash], [Matcher.ShouldEnable], and [Matcher.ShouldPrint] entirely
|
||||
// when they recognize the nil Matcher.
|
||||
func New(pattern string) (*Matcher, error) {
|
||||
if pattern == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
m := new(Matcher)
|
||||
|
||||
p := pattern
|
||||
// Special case for leading 'q' so that 'qn' quietly disables, e.g. fmahash=qn to disable fma
|
||||
// Any instance of 'v' disables 'q'.
|
||||
if len(p) > 0 && p[0] == 'q' {
|
||||
m.quiet = true
|
||||
p = p[1:]
|
||||
if p == "" {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
}
|
||||
// Allow multiple v, so that “bisect cmd vPATTERN” can force verbose all the time.
|
||||
for len(p) > 0 && p[0] == 'v' {
|
||||
m.verbose = true
|
||||
m.quiet = false
|
||||
p = p[1:]
|
||||
if p == "" {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
}
|
||||
|
||||
// Allow multiple !, each negating the last, so that “bisect cmd !PATTERN” works
|
||||
// even when bisect chooses to add its own !.
|
||||
m.enable = true
|
||||
for len(p) > 0 && p[0] == '!' {
|
||||
m.enable = !m.enable
|
||||
p = p[1:]
|
||||
if p == "" {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
}
|
||||
|
||||
if p == "n" {
|
||||
// n is an alias for !y.
|
||||
m.enable = !m.enable
|
||||
p = "y"
|
||||
}
|
||||
|
||||
// Parse actual pattern syntax.
|
||||
result := true
|
||||
bits := uint64(0)
|
||||
start := 0
|
||||
wid := 1 // 1-bit (binary); sometimes 4-bit (hex)
|
||||
for i := 0; i <= len(p); i++ {
|
||||
// Imagine a trailing - at the end of the pattern to flush final suffix
|
||||
c := byte('-')
|
||||
if i < len(p) {
|
||||
c = p[i]
|
||||
}
|
||||
if i == start && wid == 1 && c == 'x' { // leading x for hex
|
||||
start = i + 1
|
||||
wid = 4
|
||||
continue
|
||||
}
|
||||
switch c {
|
||||
default:
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
case '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
if wid != 4 {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
fallthrough
|
||||
case '0', '1':
|
||||
bits <<= wid
|
||||
bits |= uint64(c - '0')
|
||||
case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F':
|
||||
if wid != 4 {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
bits <<= 4
|
||||
bits |= uint64(c&^0x20 - 'A' + 10)
|
||||
case 'y':
|
||||
if i+1 < len(p) && (p[i+1] == '0' || p[i+1] == '1') {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
bits = 0
|
||||
case '+', '-':
|
||||
if c == '+' && result == false {
|
||||
// Have already seen a -. Should be - from here on.
|
||||
return nil, &parseError{"invalid pattern syntax (+ after -): " + pattern}
|
||||
}
|
||||
if i > 0 {
|
||||
n := (i - start) * wid
|
||||
if n > 64 {
|
||||
return nil, &parseError{"pattern bits too long: " + pattern}
|
||||
}
|
||||
if n <= 0 {
|
||||
return nil, &parseError{"invalid pattern syntax: " + pattern}
|
||||
}
|
||||
if p[start] == 'y' {
|
||||
n = 0
|
||||
}
|
||||
mask := uint64(1)<<n - 1
|
||||
m.list = append(m.list, cond{mask, bits, result})
|
||||
} else if c == '-' {
|
||||
// leading - subtracts from complete set
|
||||
m.list = append(m.list, cond{0, 0, true})
|
||||
}
|
||||
bits = 0
|
||||
result = c == '+'
|
||||
start = i + 1
|
||||
wid = 1
|
||||
}
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// A Matcher is the parsed, compiled form of a PATTERN string.
|
||||
// The nil *Matcher is valid: it has all changes enabled but none reported.
|
||||
type Matcher struct {
|
||||
verbose bool // annotate reporting with human-helpful information
|
||||
quiet bool // disables all reporting. reset if verbose is true. use case is -d=fmahash=qn
|
||||
enable bool // when true, list is for “enable and report” (when false, “disable and report”)
|
||||
list []cond // conditions; later ones win over earlier ones
|
||||
dedup atomic.Pointer[dedup]
|
||||
}
|
||||
|
||||
// A cond is a single condition in the matcher.
|
||||
// Given an input id, if id&mask == bits, return the result.
|
||||
type cond struct {
|
||||
mask uint64
|
||||
bits uint64
|
||||
result bool
|
||||
}
|
||||
|
||||
// MarkerOnly reports whether it is okay to print only the marker for
|
||||
// a given change, omitting the identifying information.
|
||||
// MarkerOnly returns true when bisect is using the printed reports
|
||||
// only for an intermediate search step, not for showing to users.
|
||||
func (m *Matcher) MarkerOnly() bool {
|
||||
return !m.verbose
|
||||
}
|
||||
|
||||
// ShouldEnable reports whether the change with the given id should be enabled.
|
||||
func (m *Matcher) ShouldEnable(id uint64) bool {
|
||||
if m == nil {
|
||||
return true
|
||||
}
|
||||
return m.matchResult(id) == m.enable
|
||||
}
|
||||
|
||||
// ShouldPrint reports whether to print identifying information about the change with the given id.
|
||||
func (m *Matcher) ShouldPrint(id uint64) bool {
|
||||
if m == nil || m.quiet {
|
||||
return false
|
||||
}
|
||||
return m.matchResult(id)
|
||||
}
|
||||
|
||||
// matchResult returns the result from the first condition that matches id.
|
||||
func (m *Matcher) matchResult(id uint64) bool {
|
||||
for i := len(m.list) - 1; i >= 0; i-- {
|
||||
c := &m.list[i]
|
||||
if id&c.mask == c.bits {
|
||||
return c.result
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// FileLine reports whether the change identified by file and line should be enabled.
|
||||
// If the change should be printed, FileLine prints a one-line report to w.
|
||||
func (m *Matcher) FileLine(w Writer, file string, line int) bool {
|
||||
if m == nil {
|
||||
return true
|
||||
}
|
||||
return m.fileLine(w, file, line)
|
||||
}
|
||||
|
||||
// fileLine does the real work for FileLine.
|
||||
// This lets FileLine's body handle m == nil and potentially be inlined.
|
||||
func (m *Matcher) fileLine(w Writer, file string, line int) bool {
|
||||
h := Hash(file, line)
|
||||
if m.ShouldPrint(h) {
|
||||
if m.MarkerOnly() {
|
||||
PrintMarker(w, h)
|
||||
} else {
|
||||
printFileLine(w, h, file, line)
|
||||
}
|
||||
}
|
||||
return m.ShouldEnable(h)
|
||||
}
|
||||
|
||||
// printFileLine prints a non-marker-only report for file:line to w.
|
||||
func printFileLine(w Writer, h uint64, file string, line int) error {
|
||||
const markerLen = 40 // overestimate
|
||||
b := make([]byte, 0, markerLen+len(file)+24)
|
||||
b = AppendMarker(b, h)
|
||||
b = appendFileLine(b, file, line)
|
||||
b = append(b, '\n')
|
||||
_, err := w.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
// appendFileLine appends file:line to dst, returning the extended slice.
|
||||
func appendFileLine(dst []byte, file string, line int) []byte {
|
||||
dst = append(dst, file...)
|
||||
dst = append(dst, ':')
|
||||
u := uint(line)
|
||||
if line < 0 {
|
||||
dst = append(dst, '-')
|
||||
u = -u
|
||||
}
|
||||
var buf [24]byte
|
||||
i := len(buf)
|
||||
for i == len(buf) || u > 0 {
|
||||
i--
|
||||
buf[i] = '0' + byte(u%10)
|
||||
u /= 10
|
||||
}
|
||||
dst = append(dst, buf[i:]...)
|
||||
return dst
|
||||
}
|
||||
|
||||
// MatchStack assigns the current call stack a change ID.
|
||||
// If the stack should be printed, MatchStack prints it.
|
||||
// Then MatchStack reports whether a change at the current call stack should be enabled.
|
||||
func (m *Matcher) Stack(w Writer) bool {
|
||||
if m == nil {
|
||||
return true
|
||||
}
|
||||
return m.stack(w)
|
||||
}
|
||||
|
||||
// stack does the real work for Stack.
|
||||
// This lets stack's body handle m == nil and potentially be inlined.
|
||||
func (m *Matcher) stack(w Writer) bool {
|
||||
const maxStack = 16
|
||||
var stk [maxStack]uintptr
|
||||
n := runtime.Callers(2, stk[:])
|
||||
// caller #2 is not for printing; need it to normalize PCs if ASLR.
|
||||
if n <= 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
base := stk[0]
|
||||
// normalize PCs
|
||||
for i := range stk[:n] {
|
||||
stk[i] -= base
|
||||
}
|
||||
|
||||
h := Hash(stk[:n])
|
||||
if m.ShouldPrint(h) {
|
||||
var d *dedup
|
||||
for {
|
||||
d = m.dedup.Load()
|
||||
if d != nil {
|
||||
break
|
||||
}
|
||||
d = new(dedup)
|
||||
if m.dedup.CompareAndSwap(nil, d) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if m.MarkerOnly() {
|
||||
if !d.seenLossy(h) {
|
||||
PrintMarker(w, h)
|
||||
}
|
||||
} else {
|
||||
if !d.seen(h) {
|
||||
// Restore PCs in stack for printing
|
||||
for i := range stk[:n] {
|
||||
stk[i] += base
|
||||
}
|
||||
printStack(w, h, stk[1:n])
|
||||
}
|
||||
}
|
||||
}
|
||||
return m.ShouldEnable(h)
|
||||
}
|
||||
|
||||
// Writer is the same interface as io.Writer.
|
||||
// It is duplicated here to avoid importing io.
|
||||
type Writer interface {
|
||||
Write([]byte) (int, error)
|
||||
}
|
||||
|
||||
// PrintMarker prints to w a one-line report containing only the marker for h.
|
||||
// It is appropriate to use when [Matcher.ShouldPrint] and [Matcher.MarkerOnly] both return true.
|
||||
func PrintMarker(w Writer, h uint64) error {
|
||||
var buf [50]byte
|
||||
b := AppendMarker(buf[:0], h)
|
||||
b = append(b, '\n')
|
||||
_, err := w.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
// printStack prints to w a multi-line report containing a formatting of the call stack stk,
|
||||
// with each line preceded by the marker for h.
|
||||
func printStack(w Writer, h uint64, stk []uintptr) error {
|
||||
buf := make([]byte, 0, 2048)
|
||||
|
||||
var prefixBuf [100]byte
|
||||
prefix := AppendMarker(prefixBuf[:0], h)
|
||||
|
||||
frames := runtime.CallersFrames(stk)
|
||||
for {
|
||||
f, more := frames.Next()
|
||||
buf = append(buf, prefix...)
|
||||
buf = append(buf, f.Function...)
|
||||
buf = append(buf, "()\n"...)
|
||||
buf = append(buf, prefix...)
|
||||
buf = append(buf, '\t')
|
||||
buf = appendFileLine(buf, f.File, f.Line)
|
||||
buf = append(buf, '\n')
|
||||
if !more {
|
||||
break
|
||||
}
|
||||
}
|
||||
buf = append(buf, prefix...)
|
||||
buf = append(buf, '\n')
|
||||
_, err := w.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// Marker returns the match marker text to use on any line reporting details
|
||||
// about a match of the given ID.
|
||||
// It always returns the hexadecimal format.
|
||||
func Marker(id uint64) string {
|
||||
return string(AppendMarker(nil, id))
|
||||
}
|
||||
|
||||
// AppendMarker is like [Marker] but appends the marker to dst.
|
||||
func AppendMarker(dst []byte, id uint64) []byte {
|
||||
const prefix = "[bisect-match 0x"
|
||||
var buf [len(prefix) + 16 + 1]byte
|
||||
copy(buf[:], prefix)
|
||||
for i := 0; i < 16; i++ {
|
||||
buf[len(prefix)+i] = "0123456789abcdef"[id>>60]
|
||||
id <<= 4
|
||||
}
|
||||
buf[len(prefix)+16] = ']'
|
||||
return append(dst, buf[:]...)
|
||||
}
|
||||
|
||||
// CutMarker finds the first match marker in line and removes it,
|
||||
// returning the shortened line (with the marker removed),
|
||||
// the ID from the match marker,
|
||||
// and whether a marker was found at all.
|
||||
// If there is no marker, CutMarker returns line, 0, false.
|
||||
func CutMarker(line string) (short string, id uint64, ok bool) {
|
||||
// Find first instance of prefix.
|
||||
prefix := "[bisect-match "
|
||||
i := 0
|
||||
for ; ; i++ {
|
||||
if i >= len(line)-len(prefix) {
|
||||
return line, 0, false
|
||||
}
|
||||
if line[i] == '[' && line[i:i+len(prefix)] == prefix {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Scan to ].
|
||||
j := i + len(prefix)
|
||||
for j < len(line) && line[j] != ']' {
|
||||
j++
|
||||
}
|
||||
if j >= len(line) {
|
||||
return line, 0, false
|
||||
}
|
||||
|
||||
// Parse id.
|
||||
idstr := line[i+len(prefix) : j]
|
||||
if len(idstr) >= 3 && idstr[:2] == "0x" {
|
||||
// parse hex
|
||||
if len(idstr) > 2+16 { // max 0x + 16 digits
|
||||
return line, 0, false
|
||||
}
|
||||
for i := 2; i < len(idstr); i++ {
|
||||
id <<= 4
|
||||
switch c := idstr[i]; {
|
||||
case '0' <= c && c <= '9':
|
||||
id |= uint64(c - '0')
|
||||
case 'a' <= c && c <= 'f':
|
||||
id |= uint64(c - 'a' + 10)
|
||||
case 'A' <= c && c <= 'F':
|
||||
id |= uint64(c - 'A' + 10)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if idstr == "" || len(idstr) > 64 { // min 1 digit, max 64 digits
|
||||
return line, 0, false
|
||||
}
|
||||
// parse binary
|
||||
for i := 0; i < len(idstr); i++ {
|
||||
id <<= 1
|
||||
switch c := idstr[i]; c {
|
||||
default:
|
||||
return line, 0, false
|
||||
case '0', '1':
|
||||
id |= uint64(c - '0')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Construct shortened line.
|
||||
// Remove at most one space from around the marker,
|
||||
// so that "foo [marker] bar" shortens to "foo bar".
|
||||
j++ // skip ]
|
||||
if i > 0 && line[i-1] == ' ' {
|
||||
i--
|
||||
} else if j < len(line) && line[j] == ' ' {
|
||||
j++
|
||||
}
|
||||
short = line[:i] + line[j:]
|
||||
return short, id, true
|
||||
}
|
||||
|
||||
// Hash computes a hash of the data arguments,
|
||||
// each of which must be of type string, byte, int, uint, int32, uint32, int64, uint64, uintptr, or a slice of one of those types.
|
||||
func Hash(data ...any) uint64 {
|
||||
h := offset64
|
||||
for _, v := range data {
|
||||
switch v := v.(type) {
|
||||
default:
|
||||
// Note: Not printing the type, because reflect.ValueOf(v)
|
||||
// would make the interfaces prepared by the caller escape
|
||||
// and therefore allocate. This way, Hash(file, line) runs
|
||||
// without any allocation. It should be clear from the
|
||||
// source code calling Hash what the bad argument was.
|
||||
panic("bisect.Hash: unexpected argument type")
|
||||
case string:
|
||||
h = fnvString(h, v)
|
||||
case byte:
|
||||
h = fnv(h, v)
|
||||
case int:
|
||||
h = fnvUint64(h, uint64(v))
|
||||
case uint:
|
||||
h = fnvUint64(h, uint64(v))
|
||||
case int32:
|
||||
h = fnvUint32(h, uint32(v))
|
||||
case uint32:
|
||||
h = fnvUint32(h, v)
|
||||
case int64:
|
||||
h = fnvUint64(h, uint64(v))
|
||||
case uint64:
|
||||
h = fnvUint64(h, v)
|
||||
case uintptr:
|
||||
h = fnvUint64(h, uint64(v))
|
||||
case []string:
|
||||
for _, x := range v {
|
||||
h = fnvString(h, x)
|
||||
}
|
||||
case []byte:
|
||||
for _, x := range v {
|
||||
h = fnv(h, x)
|
||||
}
|
||||
case []int:
|
||||
for _, x := range v {
|
||||
h = fnvUint64(h, uint64(x))
|
||||
}
|
||||
case []uint:
|
||||
for _, x := range v {
|
||||
h = fnvUint64(h, uint64(x))
|
||||
}
|
||||
case []int32:
|
||||
for _, x := range v {
|
||||
h = fnvUint32(h, uint32(x))
|
||||
}
|
||||
case []uint32:
|
||||
for _, x := range v {
|
||||
h = fnvUint32(h, x)
|
||||
}
|
||||
case []int64:
|
||||
for _, x := range v {
|
||||
h = fnvUint64(h, uint64(x))
|
||||
}
|
||||
case []uint64:
|
||||
for _, x := range v {
|
||||
h = fnvUint64(h, x)
|
||||
}
|
||||
case []uintptr:
|
||||
for _, x := range v {
|
||||
h = fnvUint64(h, uint64(x))
|
||||
}
|
||||
}
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// Trivial error implementation, here to avoid importing errors.
|
||||
|
||||
// parseError is a trivial error implementation,
|
||||
// defined here to avoid importing errors.
|
||||
type parseError struct{ text string }
|
||||
|
||||
func (e *parseError) Error() string { return e.text }
|
||||
|
||||
// FNV-1a implementation. See Go's hash/fnv/fnv.go.
|
||||
// Copied here for simplicity (can handle integers more directly)
|
||||
// and to avoid importing hash/fnv.
|
||||
|
||||
const (
|
||||
offset64 uint64 = 14695981039346656037
|
||||
prime64 uint64 = 1099511628211
|
||||
)
|
||||
|
||||
func fnv(h uint64, x byte) uint64 {
|
||||
h ^= uint64(x)
|
||||
h *= prime64
|
||||
return h
|
||||
}
|
||||
|
||||
func fnvString(h uint64, x string) uint64 {
|
||||
for i := 0; i < len(x); i++ {
|
||||
h ^= uint64(x[i])
|
||||
h *= prime64
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
func fnvUint64(h uint64, x uint64) uint64 {
|
||||
for i := 0; i < 8; i++ {
|
||||
h ^= x & 0xFF
|
||||
x >>= 8
|
||||
h *= prime64
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
func fnvUint32(h uint64, x uint32) uint64 {
|
||||
for i := 0; i < 4; i++ {
|
||||
h ^= uint64(x & 0xFF)
|
||||
x >>= 8
|
||||
h *= prime64
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// A dedup is a deduplicator for call stacks, so that we only print
|
||||
// a report for new call stacks, not for call stacks we've already
|
||||
// reported.
|
||||
//
|
||||
// It has two modes: an approximate but lock-free mode that
|
||||
// may still emit some duplicates, and a precise mode that uses
|
||||
// a lock and never emits duplicates.
|
||||
type dedup struct {
|
||||
// 128-entry 4-way, lossy cache for seenLossy
|
||||
recent [128][4]uint64
|
||||
|
||||
// complete history for seen
|
||||
mu sync.Mutex
|
||||
m map[uint64]bool
|
||||
}
|
||||
|
||||
// seen records that h has now been seen and reports whether it was seen before.
|
||||
// When seen returns false, the caller is expected to print a report for h.
|
||||
func (d *dedup) seen(h uint64) bool {
|
||||
d.mu.Lock()
|
||||
if d.m == nil {
|
||||
d.m = make(map[uint64]bool)
|
||||
}
|
||||
seen := d.m[h]
|
||||
d.m[h] = true
|
||||
d.mu.Unlock()
|
||||
return seen
|
||||
}
|
||||
|
||||
// seenLossy is a variant of seen that avoids a lock by using a cache of recently seen hashes.
|
||||
// Each cache entry is N-way set-associative: h can appear in any of the slots.
|
||||
// If h does not appear in any of them, then it is inserted into a random slot,
|
||||
// overwriting whatever was there before.
|
||||
func (d *dedup) seenLossy(h uint64) bool {
|
||||
cache := &d.recent[uint(h)%uint(len(d.recent))]
|
||||
for i := 0; i < len(cache); i++ {
|
||||
if atomic.LoadUint64(&cache[i]) == h {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Compute index in set to evict as hash of current set.
|
||||
ch := offset64
|
||||
for _, x := range cache {
|
||||
ch = fnvUint64(ch, x)
|
||||
}
|
||||
atomic.StoreUint64(&cache[uint(ch)%uint(len(cache))], h)
|
||||
return false
|
||||
}
|
||||
414
src/internal/buildcfg/cfg.go
Normal file
414
src/internal/buildcfg/cfg.go
Normal file
@@ -0,0 +1,414 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package buildcfg provides access to the build configuration
|
||||
// described by the current environment. It is for use by build tools
|
||||
// such as cmd/go or cmd/compile and for setting up go/build's Default context.
|
||||
//
|
||||
// Note that it does NOT provide access to the build configuration used to
|
||||
// build the currently-running binary. For that, use runtime.GOOS etc
|
||||
// as well as internal/goexperiment.
|
||||
package buildcfg
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
GOROOT = os.Getenv("GOROOT") // cached for efficiency
|
||||
GOARCH = envOr("GOARCH", defaultGOARCH)
|
||||
GOOS = envOr("GOOS", defaultGOOS)
|
||||
GO386 = envOr("GO386", defaultGO386)
|
||||
GOAMD64 = goamd64()
|
||||
GOARM = goarm()
|
||||
GOARM64 = goarm64()
|
||||
GOMIPS = gomips()
|
||||
GOMIPS64 = gomips64()
|
||||
GOPPC64 = goppc64()
|
||||
GORISCV64 = goriscv64()
|
||||
GOWASM = gowasm()
|
||||
ToolTags = toolTags()
|
||||
GO_LDSO = defaultGO_LDSO
|
||||
Version = version
|
||||
)
|
||||
|
||||
// Error is one of the errors found (if any) in the build configuration.
|
||||
var Error error
|
||||
|
||||
// Check exits the program with a fatal error if Error is non-nil.
|
||||
func Check() {
|
||||
if Error != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s: %v\n", filepath.Base(os.Args[0]), Error)
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
func envOr(key, value string) string {
|
||||
if x := os.Getenv(key); x != "" {
|
||||
return x
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func goamd64() int {
|
||||
switch v := envOr("GOAMD64", defaultGOAMD64); v {
|
||||
case "v1":
|
||||
return 1
|
||||
case "v2":
|
||||
return 2
|
||||
case "v3":
|
||||
return 3
|
||||
case "v4":
|
||||
return 4
|
||||
}
|
||||
Error = fmt.Errorf("invalid GOAMD64: must be v1, v2, v3, v4")
|
||||
return int(defaultGOAMD64[len("v")] - '0')
|
||||
}
|
||||
|
||||
type goarmFeatures struct {
|
||||
Version int
|
||||
SoftFloat bool
|
||||
}
|
||||
|
||||
func (g goarmFeatures) String() string {
|
||||
armStr := strconv.Itoa(g.Version)
|
||||
if g.SoftFloat {
|
||||
armStr += ",softfloat"
|
||||
} else {
|
||||
armStr += ",hardfloat"
|
||||
}
|
||||
return armStr
|
||||
}
|
||||
|
||||
func goarm() (g goarmFeatures) {
|
||||
const (
|
||||
softFloatOpt = ",softfloat"
|
||||
hardFloatOpt = ",hardfloat"
|
||||
)
|
||||
def := defaultGOARM
|
||||
if GOOS == "android" && GOARCH == "arm" {
|
||||
// Android arm devices always support GOARM=7.
|
||||
def = "7"
|
||||
}
|
||||
v := envOr("GOARM", def)
|
||||
|
||||
floatSpecified := false
|
||||
if strings.HasSuffix(v, softFloatOpt) {
|
||||
g.SoftFloat = true
|
||||
floatSpecified = true
|
||||
v = v[:len(v)-len(softFloatOpt)]
|
||||
}
|
||||
if strings.HasSuffix(v, hardFloatOpt) {
|
||||
floatSpecified = true
|
||||
v = v[:len(v)-len(hardFloatOpt)]
|
||||
}
|
||||
|
||||
switch v {
|
||||
case "5":
|
||||
g.Version = 5
|
||||
case "6":
|
||||
g.Version = 6
|
||||
case "7":
|
||||
g.Version = 7
|
||||
default:
|
||||
Error = fmt.Errorf("invalid GOARM: must start with 5, 6, or 7, and may optionally end in either %q or %q", hardFloatOpt, softFloatOpt)
|
||||
g.Version = int(def[0] - '0')
|
||||
}
|
||||
|
||||
// 5 defaults to softfloat. 6 and 7 default to hardfloat.
|
||||
if !floatSpecified && g.Version == 5 {
|
||||
g.SoftFloat = true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type Goarm64Features struct {
|
||||
Version string
|
||||
// Large Systems Extension
|
||||
LSE bool
|
||||
// ARM v8.0 Cryptographic Extension. It includes the following features:
|
||||
// * FEAT_AES, which includes the AESD and AESE instructions.
|
||||
// * FEAT_PMULL, which includes the PMULL, PMULL2 instructions.
|
||||
// * FEAT_SHA1, which includes the SHA1* instructions.
|
||||
// * FEAT_SHA256, which includes the SHA256* instructions.
|
||||
Crypto bool
|
||||
}
|
||||
|
||||
func (g Goarm64Features) String() string {
|
||||
arm64Str := g.Version
|
||||
if g.LSE {
|
||||
arm64Str += ",lse"
|
||||
}
|
||||
if g.Crypto {
|
||||
arm64Str += ",crypto"
|
||||
}
|
||||
return arm64Str
|
||||
}
|
||||
|
||||
func ParseGoarm64(v string) (g Goarm64Features, e error) {
|
||||
const (
|
||||
lseOpt = ",lse"
|
||||
cryptoOpt = ",crypto"
|
||||
)
|
||||
|
||||
g.LSE = false
|
||||
g.Crypto = false
|
||||
// We allow any combination of suffixes, in any order
|
||||
for {
|
||||
if strings.HasSuffix(v, lseOpt) {
|
||||
g.LSE = true
|
||||
v = v[:len(v)-len(lseOpt)]
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasSuffix(v, cryptoOpt) {
|
||||
g.Crypto = true
|
||||
v = v[:len(v)-len(cryptoOpt)]
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
switch v {
|
||||
case "v8.0":
|
||||
g.Version = v
|
||||
case "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6", "v8.7", "v8.8", "v8.9",
|
||||
"v9.0", "v9.1", "v9.2", "v9.3", "v9.4", "v9.5":
|
||||
g.Version = v
|
||||
// LSE extension is mandatory starting from 8.1
|
||||
g.LSE = true
|
||||
default:
|
||||
e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
|
||||
lseOpt, cryptoOpt)
|
||||
g.Version = defaultGOARM64
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func goarm64() (g Goarm64Features) {
|
||||
g, Error = ParseGoarm64(envOr("GOARM64", defaultGOARM64))
|
||||
return
|
||||
}
|
||||
|
||||
// Returns true if g supports giving ARM64 ISA
|
||||
// Note that this function doesn't accept / test suffixes (like ",lse" or ",crypto")
|
||||
func (g Goarm64Features) Supports(s string) bool {
|
||||
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
|
||||
if len(s) != 4 {
|
||||
return false
|
||||
}
|
||||
|
||||
major := s[1]
|
||||
minor := s[3]
|
||||
|
||||
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
|
||||
if major < '8' || major > '9' ||
|
||||
minor < '0' || minor > '9' ||
|
||||
s[0] != 'v' || s[2] != '.' {
|
||||
return false
|
||||
}
|
||||
|
||||
g_major := g.Version[1]
|
||||
g_minor := g.Version[3]
|
||||
|
||||
if major == g_major {
|
||||
return minor <= g_minor
|
||||
} else if g_major == '9' {
|
||||
// v9.0 diverged from v8.5. This means we should compare with g_minor increased by five.
|
||||
return minor <= g_minor+5
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func gomips() string {
|
||||
switch v := envOr("GOMIPS", defaultGOMIPS); v {
|
||||
case "hardfloat", "softfloat":
|
||||
return v
|
||||
}
|
||||
Error = fmt.Errorf("invalid GOMIPS: must be hardfloat, softfloat")
|
||||
return defaultGOMIPS
|
||||
}
|
||||
|
||||
func gomips64() string {
|
||||
switch v := envOr("GOMIPS64", defaultGOMIPS64); v {
|
||||
case "hardfloat", "softfloat":
|
||||
return v
|
||||
}
|
||||
Error = fmt.Errorf("invalid GOMIPS64: must be hardfloat, softfloat")
|
||||
return defaultGOMIPS64
|
||||
}
|
||||
|
||||
func goppc64() int {
|
||||
switch v := envOr("GOPPC64", defaultGOPPC64); v {
|
||||
case "power8":
|
||||
return 8
|
||||
case "power9":
|
||||
return 9
|
||||
case "power10":
|
||||
return 10
|
||||
}
|
||||
Error = fmt.Errorf("invalid GOPPC64: must be power8, power9, power10")
|
||||
return int(defaultGOPPC64[len("power")] - '0')
|
||||
}
|
||||
|
||||
func goriscv64() int {
|
||||
switch v := envOr("GORISCV64", defaultGORISCV64); v {
|
||||
case "rva20u64":
|
||||
return 20
|
||||
case "rva22u64":
|
||||
return 22
|
||||
}
|
||||
Error = fmt.Errorf("invalid GORISCV64: must be rva20u64, rva22u64")
|
||||
v := defaultGORISCV64[len("rva"):]
|
||||
i := strings.IndexFunc(v, func(r rune) bool {
|
||||
return r < '0' || r > '9'
|
||||
})
|
||||
year, _ := strconv.Atoi(v[:i])
|
||||
return year
|
||||
}
|
||||
|
||||
type gowasmFeatures struct {
|
||||
SatConv bool
|
||||
SignExt bool
|
||||
}
|
||||
|
||||
func (f gowasmFeatures) String() string {
|
||||
var flags []string
|
||||
if f.SatConv {
|
||||
flags = append(flags, "satconv")
|
||||
}
|
||||
if f.SignExt {
|
||||
flags = append(flags, "signext")
|
||||
}
|
||||
return strings.Join(flags, ",")
|
||||
}
|
||||
|
||||
func gowasm() (f gowasmFeatures) {
|
||||
for _, opt := range strings.Split(envOr("GOWASM", ""), ",") {
|
||||
switch opt {
|
||||
case "satconv":
|
||||
f.SatConv = true
|
||||
case "signext":
|
||||
f.SignExt = true
|
||||
case "":
|
||||
// ignore
|
||||
default:
|
||||
Error = fmt.Errorf("invalid GOWASM: no such feature %q", opt)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func Getgoextlinkenabled() string {
|
||||
return envOr("GO_EXTLINK_ENABLED", defaultGO_EXTLINK_ENABLED)
|
||||
}
|
||||
|
||||
func toolTags() []string {
|
||||
tags := experimentTags()
|
||||
tags = append(tags, gogoarchTags()...)
|
||||
return tags
|
||||
}
|
||||
|
||||
func experimentTags() []string {
|
||||
var list []string
|
||||
// For each experiment that has been enabled in the toolchain, define a
|
||||
// build tag with the same name but prefixed by "goexperiment." which can be
|
||||
// used for compiling alternative files for the experiment. This allows
|
||||
// changes for the experiment, like extra struct fields in the runtime,
|
||||
// without affecting the base non-experiment code at all.
|
||||
for _, exp := range Experiment.Enabled() {
|
||||
list = append(list, "goexperiment."+exp)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// GOGOARCH returns the name and value of the GO$GOARCH setting.
|
||||
// For example, if GOARCH is "amd64" it might return "GOAMD64", "v2".
|
||||
func GOGOARCH() (name, value string) {
|
||||
switch GOARCH {
|
||||
case "386":
|
||||
return "GO386", GO386
|
||||
case "amd64":
|
||||
return "GOAMD64", fmt.Sprintf("v%d", GOAMD64)
|
||||
case "arm":
|
||||
return "GOARM", GOARM.String()
|
||||
case "arm64":
|
||||
return "GOARM64", GOARM64.String()
|
||||
case "mips", "mipsle":
|
||||
return "GOMIPS", GOMIPS
|
||||
case "mips64", "mips64le":
|
||||
return "GOMIPS64", GOMIPS64
|
||||
case "ppc64", "ppc64le":
|
||||
return "GOPPC64", fmt.Sprintf("power%d", GOPPC64)
|
||||
case "wasm":
|
||||
return "GOWASM", GOWASM.String()
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func gogoarchTags() []string {
|
||||
switch GOARCH {
|
||||
case "386":
|
||||
return []string{GOARCH + "." + GO386}
|
||||
case "amd64":
|
||||
var list []string
|
||||
for i := 1; i <= GOAMD64; i++ {
|
||||
list = append(list, fmt.Sprintf("%s.v%d", GOARCH, i))
|
||||
}
|
||||
return list
|
||||
case "arm":
|
||||
var list []string
|
||||
for i := 5; i <= GOARM.Version; i++ {
|
||||
list = append(list, fmt.Sprintf("%s.%d", GOARCH, i))
|
||||
}
|
||||
return list
|
||||
case "arm64":
|
||||
var list []string
|
||||
major := int(GOARM64.Version[1] - '0')
|
||||
minor := int(GOARM64.Version[3] - '0')
|
||||
for i := 0; i <= minor; i++ {
|
||||
list = append(list, fmt.Sprintf("%s.v%d.%d", GOARCH, major, i))
|
||||
}
|
||||
// ARM64 v9.x also includes support of v8.x+5 (i.e. v9.1 includes v8.(1+5) = v8.6).
|
||||
if major == 9 {
|
||||
for i := 0; i <= minor+5 && i <= 9; i++ {
|
||||
list = append(list, fmt.Sprintf("%s.v%d.%d", GOARCH, 8, i))
|
||||
}
|
||||
}
|
||||
return list
|
||||
case "mips", "mipsle":
|
||||
return []string{GOARCH + "." + GOMIPS}
|
||||
case "mips64", "mips64le":
|
||||
return []string{GOARCH + "." + GOMIPS64}
|
||||
case "ppc64", "ppc64le":
|
||||
var list []string
|
||||
for i := 8; i <= GOPPC64; i++ {
|
||||
list = append(list, fmt.Sprintf("%s.power%d", GOARCH, i))
|
||||
}
|
||||
return list
|
||||
case "riscv64":
|
||||
list := []string{GOARCH + "." + "rva20u64"}
|
||||
if GORISCV64 >= 22 {
|
||||
list = append(list, GOARCH+"."+"rva22u64")
|
||||
}
|
||||
return list
|
||||
case "wasm":
|
||||
var list []string
|
||||
if GOWASM.SatConv {
|
||||
list = append(list, GOARCH+".satconv")
|
||||
}
|
||||
if GOWASM.SignExt {
|
||||
list = append(list, GOARCH+".signext")
|
||||
}
|
||||
return list
|
||||
}
|
||||
return nil
|
||||
}
|
||||
125
src/internal/buildcfg/cfg_test.go
Normal file
125
src/internal/buildcfg/cfg_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package buildcfg
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestConfigFlags(t *testing.T) {
|
||||
os.Setenv("GOAMD64", "v1")
|
||||
if goamd64() != 1 {
|
||||
t.Errorf("Wrong parsing of GOAMD64=v1")
|
||||
}
|
||||
os.Setenv("GOAMD64", "v4")
|
||||
if goamd64() != 4 {
|
||||
t.Errorf("Wrong parsing of GOAMD64=v4")
|
||||
}
|
||||
Error = nil
|
||||
os.Setenv("GOAMD64", "1")
|
||||
if goamd64(); Error == nil {
|
||||
t.Errorf("Wrong parsing of GOAMD64=1")
|
||||
}
|
||||
|
||||
os.Setenv("GORISCV64", "rva20u64")
|
||||
if goriscv64() != 20 {
|
||||
t.Errorf("Wrong parsing of RISCV64=rva20u64")
|
||||
}
|
||||
os.Setenv("GORISCV64", "rva22u64")
|
||||
if goriscv64() != 22 {
|
||||
t.Errorf("Wrong parsing of RISCV64=rva22u64")
|
||||
}
|
||||
Error = nil
|
||||
os.Setenv("GORISCV64", "rva22")
|
||||
if _ = goriscv64(); Error == nil {
|
||||
t.Errorf("Wrong parsing of RISCV64=rva22")
|
||||
}
|
||||
Error = nil
|
||||
os.Setenv("GOARM64", "v7.0")
|
||||
if _ = goarm64(); Error == nil {
|
||||
t.Errorf("Wrong parsing of GOARM64=7.0")
|
||||
}
|
||||
Error = nil
|
||||
os.Setenv("GOARM64", "8.0")
|
||||
if _ = goarm64(); Error == nil {
|
||||
t.Errorf("Wrong parsing of GOARM64=8.0")
|
||||
}
|
||||
Error = nil
|
||||
os.Setenv("GOARM64", "v8.0,lsb")
|
||||
if _ = goarm64(); Error == nil {
|
||||
t.Errorf("Wrong parsing of GOARM64=v8.0,lsb")
|
||||
}
|
||||
os.Setenv("GOARM64", "v8.0,lse")
|
||||
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != false {
|
||||
t.Errorf("Wrong parsing of GOARM64=v8.0,lse")
|
||||
}
|
||||
os.Setenv("GOARM64", "v8.0,crypto")
|
||||
if goarm64().Version != "v8.0" || goarm64().LSE != false || goarm64().Crypto != true {
|
||||
t.Errorf("Wrong parsing of GOARM64=v8.0,crypto")
|
||||
}
|
||||
os.Setenv("GOARM64", "v8.0,crypto,lse")
|
||||
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != true {
|
||||
t.Errorf("Wrong parsing of GOARM64=v8.0,crypto,lse")
|
||||
}
|
||||
os.Setenv("GOARM64", "v8.0,lse,crypto")
|
||||
if goarm64().Version != "v8.0" || goarm64().LSE != true || goarm64().Crypto != true {
|
||||
t.Errorf("Wrong parsing of GOARM64=v8.0,lse,crypto")
|
||||
}
|
||||
os.Setenv("GOARM64", "v9.0")
|
||||
if goarm64().Version != "v9.0" || goarm64().LSE != true || goarm64().Crypto != false {
|
||||
t.Errorf("Wrong parsing of GOARM64=v9.0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoarm64FeaturesSupports(t *testing.T) {
|
||||
g, _ := ParseGoarm64("v9.3")
|
||||
|
||||
if !g.Supports("v9.3") {
|
||||
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.3")
|
||||
}
|
||||
|
||||
if g.Supports("v9.4") {
|
||||
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.4")
|
||||
}
|
||||
|
||||
if !g.Supports("v8.8") {
|
||||
t.Errorf("Wrong goarm64Features.Supports for v9.3, v8.8")
|
||||
}
|
||||
|
||||
if g.Supports("v8.9") {
|
||||
t.Errorf("Wrong goarm64Features.Supports for v9.3, v8.9")
|
||||
}
|
||||
|
||||
if g.Supports(",lse") {
|
||||
t.Errorf("Wrong goarm64Features.Supports for v9.3, ,lse")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGogoarchTags(t *testing.T) {
|
||||
old_goarch := GOARCH
|
||||
old_goarm64 := GOARM64
|
||||
|
||||
GOARCH = "arm64"
|
||||
|
||||
os.Setenv("GOARM64", "v9.5")
|
||||
GOARM64 = goarm64()
|
||||
tags := gogoarchTags()
|
||||
want := []string{"arm64.v9.0", "arm64.v9.1", "arm64.v9.2", "arm64.v9.3", "arm64.v9.4", "arm64.v9.5",
|
||||
"arm64.v8.0", "arm64.v8.1", "arm64.v8.2", "arm64.v8.3", "arm64.v8.4", "arm64.v8.5", "arm64.v8.6", "arm64.v8.7", "arm64.v8.8", "arm64.v8.9"}
|
||||
if len(tags) != len(want) {
|
||||
t.Errorf("Wrong number of tags for GOARM64=v9.5")
|
||||
} else {
|
||||
for i, v := range tags {
|
||||
if v != want[i] {
|
||||
t.Error("Wrong tags for GOARM64=v9.5")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GOARCH = old_goarch
|
||||
GOARM64 = old_goarm64
|
||||
}
|
||||
190
src/internal/buildcfg/exp.go
Normal file
190
src/internal/buildcfg/exp.go
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package buildcfg
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"internal/goexperiment"
|
||||
)
|
||||
|
||||
// ExperimentFlags represents a set of GOEXPERIMENT flags relative to a baseline
|
||||
// (platform-default) experiment configuration.
|
||||
type ExperimentFlags struct {
|
||||
goexperiment.Flags
|
||||
baseline goexperiment.Flags
|
||||
}
|
||||
|
||||
// Experiment contains the toolchain experiments enabled for the
|
||||
// current build.
|
||||
//
|
||||
// (This is not necessarily the set of experiments the compiler itself
|
||||
// was built with.)
|
||||
//
|
||||
// experimentBaseline specifies the experiment flags that are enabled by
|
||||
// default in the current toolchain. This is, in effect, the "control"
|
||||
// configuration and any variation from this is an experiment.
|
||||
var Experiment ExperimentFlags = func() ExperimentFlags {
|
||||
flags, err := ParseGOEXPERIMENT(GOOS, GOARCH, envOr("GOEXPERIMENT", defaultGOEXPERIMENT))
|
||||
if err != nil {
|
||||
Error = err
|
||||
return ExperimentFlags{}
|
||||
}
|
||||
return *flags
|
||||
}()
|
||||
|
||||
// DefaultGOEXPERIMENT is the embedded default GOEXPERIMENT string.
|
||||
// It is not guaranteed to be canonical.
|
||||
const DefaultGOEXPERIMENT = defaultGOEXPERIMENT
|
||||
|
||||
// FramePointerEnabled enables the use of platform conventions for
|
||||
// saving frame pointers.
|
||||
//
|
||||
// This used to be an experiment, but now it's always enabled on
|
||||
// platforms that support it.
|
||||
//
|
||||
// Note: must agree with runtime.framepointer_enabled.
|
||||
var FramePointerEnabled = GOARCH == "amd64" || GOARCH == "arm64"
|
||||
|
||||
// ParseGOEXPERIMENT parses a (GOOS, GOARCH, GOEXPERIMENT)
|
||||
// configuration tuple and returns the enabled and baseline experiment
|
||||
// flag sets.
|
||||
//
|
||||
// TODO(mdempsky): Move to internal/goexperiment.
|
||||
func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) {
|
||||
// regabiSupported is set to true on platforms where register ABI is
|
||||
// supported and enabled by default.
|
||||
// regabiAlwaysOn is set to true on platforms where register ABI is
|
||||
// always on.
|
||||
var regabiSupported, regabiAlwaysOn bool
|
||||
switch goarch {
|
||||
case "amd64", "arm64", "loong64", "ppc64le", "ppc64", "riscv64":
|
||||
regabiAlwaysOn = true
|
||||
regabiSupported = true
|
||||
}
|
||||
|
||||
baseline := goexperiment.Flags{
|
||||
RegabiWrappers: regabiSupported,
|
||||
RegabiArgs: regabiSupported,
|
||||
CoverageRedesign: true,
|
||||
}
|
||||
|
||||
// Start with the statically enabled set of experiments.
|
||||
flags := &ExperimentFlags{
|
||||
Flags: baseline,
|
||||
baseline: baseline,
|
||||
}
|
||||
|
||||
// Pick up any changes to the baseline configuration from the
|
||||
// GOEXPERIMENT environment. This can be set at make.bash time
|
||||
// and overridden at build time.
|
||||
if goexp != "" {
|
||||
// Create a map of known experiment names.
|
||||
names := make(map[string]func(bool))
|
||||
rv := reflect.ValueOf(&flags.Flags).Elem()
|
||||
rt := rv.Type()
|
||||
for i := 0; i < rt.NumField(); i++ {
|
||||
field := rv.Field(i)
|
||||
names[strings.ToLower(rt.Field(i).Name)] = field.SetBool
|
||||
}
|
||||
|
||||
// "regabi" is an alias for all working regabi
|
||||
// subexperiments, and not an experiment itself. Doing
|
||||
// this as an alias make both "regabi" and "noregabi"
|
||||
// do the right thing.
|
||||
names["regabi"] = func(v bool) {
|
||||
flags.RegabiWrappers = v
|
||||
flags.RegabiArgs = v
|
||||
}
|
||||
|
||||
// Parse names.
|
||||
for _, f := range strings.Split(goexp, ",") {
|
||||
if f == "" {
|
||||
continue
|
||||
}
|
||||
if f == "none" {
|
||||
// GOEXPERIMENT=none disables all experiment flags.
|
||||
// This is used by cmd/dist, which doesn't know how
|
||||
// to build with any experiment flags.
|
||||
flags.Flags = goexperiment.Flags{}
|
||||
continue
|
||||
}
|
||||
val := true
|
||||
if strings.HasPrefix(f, "no") {
|
||||
f, val = f[2:], false
|
||||
}
|
||||
set, ok := names[f]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown GOEXPERIMENT %s", f)
|
||||
}
|
||||
set(val)
|
||||
}
|
||||
}
|
||||
|
||||
if regabiAlwaysOn {
|
||||
flags.RegabiWrappers = true
|
||||
flags.RegabiArgs = true
|
||||
}
|
||||
// regabi is only supported on amd64, arm64, loong64, riscv64, ppc64 and ppc64le.
|
||||
if !regabiSupported {
|
||||
flags.RegabiWrappers = false
|
||||
flags.RegabiArgs = false
|
||||
}
|
||||
// Check regabi dependencies.
|
||||
if flags.RegabiArgs && !flags.RegabiWrappers {
|
||||
return nil, fmt.Errorf("GOEXPERIMENT regabiargs requires regabiwrappers")
|
||||
}
|
||||
return flags, nil
|
||||
}
|
||||
|
||||
// String returns the canonical GOEXPERIMENT string to enable this experiment
|
||||
// configuration. (Experiments in the same state as in the baseline are elided.)
|
||||
func (exp *ExperimentFlags) String() string {
|
||||
return strings.Join(expList(&exp.Flags, &exp.baseline, false), ",")
|
||||
}
|
||||
|
||||
// expList returns the list of lower-cased experiment names for
|
||||
// experiments that differ from base. base may be nil to indicate no
|
||||
// experiments. If all is true, then include all experiment flags,
|
||||
// regardless of base.
|
||||
func expList(exp, base *goexperiment.Flags, all bool) []string {
|
||||
var list []string
|
||||
rv := reflect.ValueOf(exp).Elem()
|
||||
var rBase reflect.Value
|
||||
if base != nil {
|
||||
rBase = reflect.ValueOf(base).Elem()
|
||||
}
|
||||
rt := rv.Type()
|
||||
for i := 0; i < rt.NumField(); i++ {
|
||||
name := strings.ToLower(rt.Field(i).Name)
|
||||
val := rv.Field(i).Bool()
|
||||
baseVal := false
|
||||
if base != nil {
|
||||
baseVal = rBase.Field(i).Bool()
|
||||
}
|
||||
if all || val != baseVal {
|
||||
if val {
|
||||
list = append(list, name)
|
||||
} else {
|
||||
list = append(list, "no"+name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// Enabled returns a list of enabled experiments, as
|
||||
// lower-cased experiment names.
|
||||
func (exp *ExperimentFlags) Enabled() []string {
|
||||
return expList(&exp.Flags, nil, false)
|
||||
}
|
||||
|
||||
// All returns a list of all experiment settings.
|
||||
// Disabled experiments appear in the list prefixed by "no".
|
||||
func (exp *ExperimentFlags) All() []string {
|
||||
return expList(&exp.Flags, nil, true)
|
||||
}
|
||||
118
src/internal/bytealg/bytealg.go
Normal file
118
src/internal/bytealg/bytealg.go
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
import (
|
||||
"internal/cpu"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Offsets into internal/cpu records for use in assembly.
|
||||
const (
|
||||
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
|
||||
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
||||
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
||||
|
||||
offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
|
||||
|
||||
offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9)
|
||||
)
|
||||
|
||||
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
|
||||
// If MaxLen is not 0, make sure MaxLen >= 4.
|
||||
var MaxLen int
|
||||
|
||||
// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
||||
const PrimeRK = 16777619
|
||||
|
||||
// HashStr returns the hash and the appropriate multiplicative
|
||||
// factor for use in Rabin-Karp algorithm.
|
||||
func HashStr[T string | []byte](sep T) (uint32, uint32) {
|
||||
hash := uint32(0)
|
||||
for i := 0; i < len(sep); i++ {
|
||||
hash = hash*PrimeRK + uint32(sep[i])
|
||||
}
|
||||
var pow, sq uint32 = 1, PrimeRK
|
||||
for i := len(sep); i > 0; i >>= 1 {
|
||||
if i&1 != 0 {
|
||||
pow *= sq
|
||||
}
|
||||
sq *= sq
|
||||
}
|
||||
return hash, pow
|
||||
}
|
||||
|
||||
// HashStrRev returns the hash of the reverse of sep and the
|
||||
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
||||
func HashStrRev[T string | []byte](sep T) (uint32, uint32) {
|
||||
hash := uint32(0)
|
||||
for i := len(sep) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(sep[i])
|
||||
}
|
||||
var pow, sq uint32 = 1, PrimeRK
|
||||
for i := len(sep); i > 0; i >>= 1 {
|
||||
if i&1 != 0 {
|
||||
pow *= sq
|
||||
}
|
||||
sq *= sq
|
||||
}
|
||||
return hash, pow
|
||||
}
|
||||
|
||||
// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
|
||||
// first occurrence of sep in s, or -1 if not present.
|
||||
func IndexRabinKarp[T string | []byte](s, sep T) int {
|
||||
// Rabin-Karp search
|
||||
hashss, pow := HashStr(sep)
|
||||
n := len(sep)
|
||||
var h uint32
|
||||
for i := 0; i < n; i++ {
|
||||
h = h*PrimeRK + uint32(s[i])
|
||||
}
|
||||
if h == hashss && string(s[:n]) == string(sep) {
|
||||
return 0
|
||||
}
|
||||
for i := n; i < len(s); {
|
||||
h *= PrimeRK
|
||||
h += uint32(s[i])
|
||||
h -= pow * uint32(s[i-n])
|
||||
i++
|
||||
if h == hashss && string(s[i-n:i]) == string(sep) {
|
||||
return i - n
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// LastIndexRabinKarp uses the Rabin-Karp search algorithm to return the last index of the
|
||||
// occurrence of sep in s, or -1 if not present.
|
||||
func LastIndexRabinKarp[T string | []byte](s, sep T) int {
|
||||
// Rabin-Karp search from the end of the string
|
||||
hashss, pow := HashStrRev(sep)
|
||||
n := len(sep)
|
||||
last := len(s) - n
|
||||
var h uint32
|
||||
for i := len(s) - 1; i >= last; i-- {
|
||||
h = h*PrimeRK + uint32(s[i])
|
||||
}
|
||||
if h == hashss && string(s[last:]) == string(sep) {
|
||||
return last
|
||||
}
|
||||
for i := last - 1; i >= 0; i-- {
|
||||
h *= PrimeRK
|
||||
h += uint32(s[i])
|
||||
h -= pow * uint32(s[i+n])
|
||||
if h == hashss && string(s[i:i+n]) == string(sep) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// MakeNoZero makes a slice of length n and capacity of at least n Bytes
|
||||
// without zeroing the bytes (including the bytes between len and cap).
|
||||
// It is the caller's responsibility to ensure uninitialized bytes
|
||||
// do not leak to the end user.
|
||||
func MakeNoZero(n int) []byte
|
||||
144
src/internal/bytealg/compare_386.s
Normal file
144
src/internal/bytealg/compare_386.s
Normal file
@@ -0,0 +1,144 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB),NOSPLIT,$0-28
|
||||
MOVL a_base+0(FP), SI
|
||||
MOVL a_len+4(FP), BX
|
||||
MOVL b_base+12(FP), DI
|
||||
MOVL b_len+16(FP), DX
|
||||
LEAL ret+24(FP), AX
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
|
||||
MOVL a_base+0(FP), SI
|
||||
MOVL a_len+4(FP), BX
|
||||
MOVL b_base+8(FP), DI
|
||||
MOVL b_len+12(FP), DX
|
||||
LEAL ret+16(FP), AX
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
// input:
|
||||
// SI = a
|
||||
// DI = b
|
||||
// BX = alen
|
||||
// DX = blen
|
||||
// AX = address of return word (set to 1/0/-1)
|
||||
TEXT cmpbody<>(SB),NOSPLIT,$0-0
|
||||
MOVL DX, BP
|
||||
SUBL BX, DX // DX = blen-alen
|
||||
JLE 2(PC)
|
||||
MOVL BX, BP // BP = min(alen, blen)
|
||||
CMPL SI, DI
|
||||
JEQ allsame
|
||||
CMPL BP, $4
|
||||
JB small
|
||||
#ifdef GO386_softfloat
|
||||
JMP mediumloop
|
||||
#endif
|
||||
largeloop:
|
||||
CMPL BP, $16
|
||||
JB mediumloop
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, BX
|
||||
XORL $0xffff, BX // convert EQ to NE
|
||||
JNE diff16 // branch if at least one byte is not equal
|
||||
ADDL $16, SI
|
||||
ADDL $16, DI
|
||||
SUBL $16, BP
|
||||
JMP largeloop
|
||||
|
||||
diff16:
|
||||
BSFL BX, BX // index of first byte that differs
|
||||
XORL DX, DX
|
||||
MOVB (SI)(BX*1), CX
|
||||
CMPB CX, (DI)(BX*1)
|
||||
SETHI DX
|
||||
LEAL -1(DX*2), DX // convert 1/0 to +1/-1
|
||||
MOVL DX, (AX)
|
||||
RET
|
||||
|
||||
mediumloop:
|
||||
CMPL BP, $4
|
||||
JBE _0through4
|
||||
MOVL (SI), BX
|
||||
MOVL (DI), CX
|
||||
CMPL BX, CX
|
||||
JNE diff4
|
||||
ADDL $4, SI
|
||||
ADDL $4, DI
|
||||
SUBL $4, BP
|
||||
JMP mediumloop
|
||||
|
||||
_0through4:
|
||||
MOVL -4(SI)(BP*1), BX
|
||||
MOVL -4(DI)(BP*1), CX
|
||||
CMPL BX, CX
|
||||
JEQ allsame
|
||||
|
||||
diff4:
|
||||
BSWAPL BX // reverse order of bytes
|
||||
BSWAPL CX
|
||||
XORL BX, CX // find bit differences
|
||||
BSRL CX, CX // index of highest bit difference
|
||||
SHRL CX, BX // move a's bit to bottom
|
||||
ANDL $1, BX // mask bit
|
||||
LEAL -1(BX*2), BX // 1/0 => +1/-1
|
||||
MOVL BX, (AX)
|
||||
RET
|
||||
|
||||
// 0-3 bytes in common
|
||||
small:
|
||||
LEAL (BP*8), CX
|
||||
NEGL CX
|
||||
JEQ allsame
|
||||
|
||||
// load si
|
||||
CMPB SI, $0xfc
|
||||
JA si_high
|
||||
MOVL (SI), SI
|
||||
JMP si_finish
|
||||
si_high:
|
||||
MOVL -4(SI)(BP*1), SI
|
||||
SHRL CX, SI
|
||||
si_finish:
|
||||
SHLL CX, SI
|
||||
|
||||
// same for di
|
||||
CMPB DI, $0xfc
|
||||
JA di_high
|
||||
MOVL (DI), DI
|
||||
JMP di_finish
|
||||
di_high:
|
||||
MOVL -4(DI)(BP*1), DI
|
||||
SHRL CX, DI
|
||||
di_finish:
|
||||
SHLL CX, DI
|
||||
|
||||
BSWAPL SI // reverse order of bytes
|
||||
BSWAPL DI
|
||||
XORL SI, DI // find bit differences
|
||||
JEQ allsame
|
||||
BSRL DI, CX // index of highest bit difference
|
||||
SHRL CX, SI // move a's bit to bottom
|
||||
ANDL $1, SI // mask bit
|
||||
LEAL -1(SI*2), BX // 1/0 => +1/-1
|
||||
MOVL BX, (AX)
|
||||
RET
|
||||
|
||||
// all the bytes in common are the same, so we just need
|
||||
// to compare the lengths.
|
||||
allsame:
|
||||
XORL BX, BX
|
||||
XORL CX, CX
|
||||
TESTL DX, DX
|
||||
SETLT BX // 1 if alen > blen
|
||||
SETEQ CX // 1 if alen == blen
|
||||
LEAL -1(CX)(BX*2), BX // 1,0,-1 result
|
||||
MOVL BX, (AX)
|
||||
RET
|
||||
237
src/internal/bytealg/compare_amd64.s
Normal file
237
src/internal/bytealg/compare_amd64.s
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "asm_amd64.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
|
||||
// AX = a_base (want in SI)
|
||||
// BX = a_len (want in BX)
|
||||
// CX = a_cap (unused)
|
||||
// DI = b_base (want in DI)
|
||||
// SI = b_len (want in DX)
|
||||
// R8 = b_cap (unused)
|
||||
MOVQ SI, DX
|
||||
MOVQ AX, SI
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
|
||||
// AX = a_base (want in SI)
|
||||
// BX = a_len (want in BX)
|
||||
// CX = b_base (want in DI)
|
||||
// DI = b_len (want in DX)
|
||||
MOVQ AX, SI
|
||||
MOVQ DI, DX
|
||||
MOVQ CX, DI
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
// input:
|
||||
// SI = a
|
||||
// DI = b
|
||||
// BX = alen
|
||||
// DX = blen
|
||||
// output:
|
||||
// AX = output (-1/0/1)
|
||||
TEXT cmpbody<>(SB),NOSPLIT,$0-0
|
||||
CMPQ SI, DI
|
||||
JEQ allsame
|
||||
CMPQ BX, DX
|
||||
MOVQ DX, R8
|
||||
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
|
||||
CMPQ R8, $8
|
||||
JB small
|
||||
|
||||
CMPQ R8, $63
|
||||
JBE loop
|
||||
#ifndef hasAVX2
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
|
||||
JEQ big_loop_avx2
|
||||
JMP big_loop
|
||||
#else
|
||||
JMP big_loop_avx2
|
||||
#endif
|
||||
loop:
|
||||
CMPQ R8, $16
|
||||
JBE _0through16
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX // convert EQ to NE
|
||||
JNE diff16 // branch if at least one byte is not equal
|
||||
ADDQ $16, SI
|
||||
ADDQ $16, DI
|
||||
SUBQ $16, R8
|
||||
JMP loop
|
||||
|
||||
diff64:
|
||||
ADDQ $48, SI
|
||||
ADDQ $48, DI
|
||||
JMP diff16
|
||||
diff48:
|
||||
ADDQ $32, SI
|
||||
ADDQ $32, DI
|
||||
JMP diff16
|
||||
diff32:
|
||||
ADDQ $16, SI
|
||||
ADDQ $16, DI
|
||||
// AX = bit mask of differences
|
||||
diff16:
|
||||
BSFQ AX, BX // index of first byte that differs
|
||||
XORQ AX, AX
|
||||
MOVB (SI)(BX*1), CX
|
||||
CMPB CX, (DI)(BX*1)
|
||||
SETHI AX
|
||||
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
|
||||
RET
|
||||
|
||||
// 0 through 16 bytes left, alen>=8, blen>=8
|
||||
_0through16:
|
||||
CMPQ R8, $8
|
||||
JBE _0through8
|
||||
MOVQ (SI), AX
|
||||
MOVQ (DI), CX
|
||||
CMPQ AX, CX
|
||||
JNE diff8
|
||||
_0through8:
|
||||
MOVQ -8(SI)(R8*1), AX
|
||||
MOVQ -8(DI)(R8*1), CX
|
||||
CMPQ AX, CX
|
||||
JEQ allsame
|
||||
|
||||
// AX and CX contain parts of a and b that differ.
|
||||
diff8:
|
||||
BSWAPQ AX // reverse order of bytes
|
||||
BSWAPQ CX
|
||||
XORQ AX, CX
|
||||
BSRQ CX, CX // index of highest bit difference
|
||||
SHRQ CX, AX // move a's bit to bottom
|
||||
ANDQ $1, AX // mask bit
|
||||
LEAQ -1(AX*2), AX // 1/0 => +1/-1
|
||||
RET
|
||||
|
||||
// 0-7 bytes in common
|
||||
small:
|
||||
LEAQ (R8*8), CX // bytes left -> bits left
|
||||
NEGQ CX // - bits lift (== 64 - bits left mod 64)
|
||||
JEQ allsame
|
||||
|
||||
// load bytes of a into high bytes of AX
|
||||
CMPB SI, $0xf8
|
||||
JA si_high
|
||||
MOVQ (SI), SI
|
||||
JMP si_finish
|
||||
si_high:
|
||||
MOVQ -8(SI)(R8*1), SI
|
||||
SHRQ CX, SI
|
||||
si_finish:
|
||||
SHLQ CX, SI
|
||||
|
||||
// load bytes of b in to high bytes of BX
|
||||
CMPB DI, $0xf8
|
||||
JA di_high
|
||||
MOVQ (DI), DI
|
||||
JMP di_finish
|
||||
di_high:
|
||||
MOVQ -8(DI)(R8*1), DI
|
||||
SHRQ CX, DI
|
||||
di_finish:
|
||||
SHLQ CX, DI
|
||||
|
||||
BSWAPQ SI // reverse order of bytes
|
||||
BSWAPQ DI
|
||||
XORQ SI, DI // find bit differences
|
||||
JEQ allsame
|
||||
BSRQ DI, CX // index of highest bit difference
|
||||
SHRQ CX, SI // move a's bit to bottom
|
||||
ANDQ $1, SI // mask bit
|
||||
LEAQ -1(SI*2), AX // 1/0 => +1/-1
|
||||
RET
|
||||
|
||||
allsame:
|
||||
XORQ AX, AX
|
||||
XORQ CX, CX
|
||||
CMPQ BX, DX
|
||||
SETGT AX // 1 if alen > blen
|
||||
SETEQ CX // 1 if alen == blen
|
||||
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
|
||||
RET
|
||||
|
||||
// this works for >= 64 bytes of data.
|
||||
#ifndef hasAVX2
|
||||
big_loop:
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff16
|
||||
|
||||
MOVOU 16(SI), X0
|
||||
MOVOU 16(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff32
|
||||
|
||||
MOVOU 32(SI), X0
|
||||
MOVOU 32(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff48
|
||||
|
||||
MOVOU 48(SI), X0
|
||||
MOVOU 48(DI), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, AX
|
||||
XORQ $0xffff, AX
|
||||
JNE diff64
|
||||
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, R8
|
||||
CMPQ R8, $64
|
||||
JBE loop
|
||||
JMP big_loop
|
||||
#endif
|
||||
|
||||
// Compare 64-bytes per loop iteration.
|
||||
// Loop is unrolled and uses AVX2.
|
||||
big_loop_avx2:
|
||||
VMOVDQU (SI), Y2
|
||||
VMOVDQU (DI), Y3
|
||||
VMOVDQU 32(SI), Y4
|
||||
VMOVDQU 32(DI), Y5
|
||||
VPCMPEQB Y2, Y3, Y0
|
||||
VPMOVMSKB Y0, AX
|
||||
XORL $0xffffffff, AX
|
||||
JNE diff32_avx2
|
||||
VPCMPEQB Y4, Y5, Y6
|
||||
VPMOVMSKB Y6, AX
|
||||
XORL $0xffffffff, AX
|
||||
JNE diff64_avx2
|
||||
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, R8
|
||||
CMPQ R8, $64
|
||||
JB big_loop_avx2_exit
|
||||
JMP big_loop_avx2
|
||||
|
||||
// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
|
||||
diff32_avx2:
|
||||
VZEROUPPER
|
||||
JMP diff16
|
||||
|
||||
// Same as diff32_avx2, but for last 32 bytes.
|
||||
diff64_avx2:
|
||||
VZEROUPPER
|
||||
JMP diff48
|
||||
|
||||
// For <64 bytes remainder jump to normal loop.
|
||||
big_loop_avx2_exit:
|
||||
VZEROUPPER
|
||||
JMP loop
|
||||
86
src/internal/bytealg/compare_arm.s
Normal file
86
src/internal/bytealg/compare_arm.s
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-28
|
||||
MOVW a_base+0(FP), R2
|
||||
MOVW a_len+4(FP), R0
|
||||
MOVW b_base+12(FP), R3
|
||||
MOVW b_len+16(FP), R1
|
||||
ADD $28, R13, R7
|
||||
B cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
|
||||
MOVW a_base+0(FP), R2
|
||||
MOVW a_len+4(FP), R0
|
||||
MOVW b_base+8(FP), R3
|
||||
MOVW b_len+12(FP), R1
|
||||
ADD $20, R13, R7
|
||||
B cmpbody<>(SB)
|
||||
|
||||
// On entry:
|
||||
// R0 is the length of a
|
||||
// R1 is the length of b
|
||||
// R2 points to the start of a
|
||||
// R3 points to the start of b
|
||||
// R7 points to return value (-1/0/1 will be written here)
|
||||
//
|
||||
// On exit:
|
||||
// R4, R5, R6 and R8 are clobbered
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMP R2, R3
|
||||
BEQ samebytes
|
||||
CMP R0, R1
|
||||
MOVW R0, R6
|
||||
MOVW.LT R1, R6 // R6 is min(R0, R1)
|
||||
|
||||
CMP $0, R6
|
||||
BEQ samebytes
|
||||
CMP $4, R6
|
||||
ADD R2, R6 // R2 is current byte in a, R6 is the end of the range to compare
|
||||
BLT byte_loop // length < 4
|
||||
AND $3, R2, R8
|
||||
CMP $0, R8
|
||||
BNE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
|
||||
aligned_a:
|
||||
AND $3, R3, R8
|
||||
CMP $0, R8
|
||||
BNE byte_loop // unaligned b, use byte-wise compare
|
||||
AND $0xfffffffc, R6, R8
|
||||
// length >= 4
|
||||
chunk4_loop:
|
||||
MOVW.P 4(R2), R4
|
||||
MOVW.P 4(R3), R5
|
||||
CMP R4, R5
|
||||
BNE cmp
|
||||
CMP R2, R8
|
||||
BNE chunk4_loop
|
||||
CMP R2, R6
|
||||
BEQ samebytes // all compared bytes were the same; compare lengths
|
||||
byte_loop:
|
||||
MOVBU.P 1(R2), R4
|
||||
MOVBU.P 1(R3), R5
|
||||
CMP R4, R5
|
||||
BNE ret
|
||||
CMP R2, R6
|
||||
BNE byte_loop
|
||||
samebytes:
|
||||
CMP R0, R1
|
||||
MOVW.LT $1, R0
|
||||
MOVW.GT $-1, R0
|
||||
MOVW.EQ $0, R0
|
||||
MOVW R0, (R7)
|
||||
RET
|
||||
ret:
|
||||
// bytes differed
|
||||
MOVW.LT $1, R0
|
||||
MOVW.GT $-1, R0
|
||||
MOVW R0, (R7)
|
||||
RET
|
||||
cmp:
|
||||
SUB $4, R2, R2
|
||||
SUB $4, R3, R3
|
||||
B byte_loop
|
||||
125
src/internal/bytealg/compare_arm64.s
Normal file
125
src/internal/bytealg/compare_arm64.s
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
|
||||
// R0 = a_base (want in R0)
|
||||
// R1 = a_len (want in R1)
|
||||
// R2 = a_cap (unused)
|
||||
// R3 = b_base (want in R2)
|
||||
// R4 = b_len (want in R3)
|
||||
// R5 = b_cap (unused)
|
||||
MOVD R3, R2
|
||||
MOVD R4, R3
|
||||
B cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// R0 = a_base
|
||||
// R1 = a_len
|
||||
// R2 = b_base
|
||||
// R3 = b_len
|
||||
B cmpbody<>(SB)
|
||||
|
||||
// On entry:
|
||||
// R0 points to the start of a
|
||||
// R1 is the length of a
|
||||
// R2 points to the start of b
|
||||
// R3 is the length of b
|
||||
//
|
||||
// On exit:
|
||||
// R0 is the result
|
||||
// R4, R5, R6, R8, R9 and R10 are clobbered
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMP R0, R2
|
||||
BEQ samebytes // same starting pointers; compare lengths
|
||||
CMP R1, R3
|
||||
CSEL LT, R3, R1, R6 // R6 is min(R1, R3)
|
||||
|
||||
CBZ R6, samebytes
|
||||
BIC $0xf, R6, R10
|
||||
CBZ R10, small // length < 16
|
||||
ADD R0, R10 // end of chunk16
|
||||
// length >= 16
|
||||
chunk16_loop:
|
||||
LDP.P 16(R0), (R4, R8)
|
||||
LDP.P 16(R2), (R5, R9)
|
||||
CMP R4, R5
|
||||
BNE cmp
|
||||
CMP R8, R9
|
||||
BNE cmpnext
|
||||
CMP R10, R0
|
||||
BNE chunk16_loop
|
||||
AND $0xf, R6, R6
|
||||
CBZ R6, samebytes
|
||||
SUBS $8, R6
|
||||
BLT tail
|
||||
// the length of tail > 8 bytes
|
||||
MOVD.P 8(R0), R4
|
||||
MOVD.P 8(R2), R5
|
||||
CMP R4, R5
|
||||
BNE cmp
|
||||
SUB $8, R6
|
||||
// compare last 8 bytes
|
||||
tail:
|
||||
MOVD (R0)(R6), R4
|
||||
MOVD (R2)(R6), R5
|
||||
CMP R4, R5
|
||||
BEQ samebytes
|
||||
cmp:
|
||||
REV R4, R4
|
||||
REV R5, R5
|
||||
CMP R4, R5
|
||||
ret:
|
||||
MOVD $1, R0
|
||||
CNEG HI, R0, R0
|
||||
RET
|
||||
small:
|
||||
TBZ $3, R6, lt_8
|
||||
MOVD (R0), R4
|
||||
MOVD (R2), R5
|
||||
CMP R4, R5
|
||||
BNE cmp
|
||||
SUBS $8, R6
|
||||
BEQ samebytes
|
||||
ADD $8, R0
|
||||
ADD $8, R2
|
||||
SUB $8, R6
|
||||
B tail
|
||||
lt_8:
|
||||
TBZ $2, R6, lt_4
|
||||
MOVWU (R0), R4
|
||||
MOVWU (R2), R5
|
||||
CMPW R4, R5
|
||||
BNE cmp
|
||||
SUBS $4, R6
|
||||
BEQ samebytes
|
||||
ADD $4, R0
|
||||
ADD $4, R2
|
||||
lt_4:
|
||||
TBZ $1, R6, lt_2
|
||||
MOVHU (R0), R4
|
||||
MOVHU (R2), R5
|
||||
CMPW R4, R5
|
||||
BNE cmp
|
||||
ADD $2, R0
|
||||
ADD $2, R2
|
||||
lt_2:
|
||||
TBZ $0, R6, samebytes
|
||||
one:
|
||||
MOVBU (R0), R4
|
||||
MOVBU (R2), R5
|
||||
CMPW R4, R5
|
||||
BNE ret
|
||||
samebytes:
|
||||
CMP R3, R1
|
||||
CSET NE, R0
|
||||
CNEG LO, R0, R0
|
||||
RET
|
||||
cmpnext:
|
||||
REV R8, R4
|
||||
REV R9, R5
|
||||
CMP R4, R5
|
||||
B ret
|
||||
76
src/internal/bytealg/compare_generic.go
Normal file
76
src/internal/bytealg/compare_generic.go
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !386 && !amd64 && !s390x && !arm && !arm64 && !loong64 && !ppc64 && !ppc64le && !mips && !mipsle && !wasm && !mips64 && !mips64le && !riscv64
|
||||
|
||||
package bytealg
|
||||
|
||||
import _ "unsafe" // for go:linkname
|
||||
|
||||
func Compare(a, b []byte) int {
|
||||
l := len(a)
|
||||
if len(b) < l {
|
||||
l = len(b)
|
||||
}
|
||||
if l == 0 || &a[0] == &b[0] {
|
||||
goto samebytes
|
||||
}
|
||||
for i := 0; i < l; i++ {
|
||||
c1, c2 := a[i], b[i]
|
||||
if c1 < c2 {
|
||||
return -1
|
||||
}
|
||||
if c1 > c2 {
|
||||
return +1
|
||||
}
|
||||
}
|
||||
samebytes:
|
||||
if len(a) < len(b) {
|
||||
return -1
|
||||
}
|
||||
if len(a) > len(b) {
|
||||
return +1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func CompareString(a, b string) int {
|
||||
return runtime_cmpstring(a, b)
|
||||
}
|
||||
|
||||
// runtime.cmpstring calls are emitted by the compiler.
|
||||
//
|
||||
// runtime.cmpstring should be an internal detail,
|
||||
// but widely used packages access it using linkname.
|
||||
// Notable members of the hall of shame include:
|
||||
// - gitee.com/zhaochuninhefei/gmgo
|
||||
// - github.com/bytedance/gopkg
|
||||
// - github.com/songzhibin97/gkit
|
||||
//
|
||||
// Do not remove or change the type signature.
|
||||
// See go.dev/issue/67401.
|
||||
//
|
||||
//go:linkname runtime_cmpstring runtime.cmpstring
|
||||
func runtime_cmpstring(a, b string) int {
|
||||
l := len(a)
|
||||
if len(b) < l {
|
||||
l = len(b)
|
||||
}
|
||||
for i := 0; i < l; i++ {
|
||||
c1, c2 := a[i], b[i]
|
||||
if c1 < c2 {
|
||||
return -1
|
||||
}
|
||||
if c1 > c2 {
|
||||
return +1
|
||||
}
|
||||
}
|
||||
if len(a) < len(b) {
|
||||
return -1
|
||||
}
|
||||
if len(a) > len(b) {
|
||||
return +1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
88
src/internal/bytealg/compare_loong64.s
Normal file
88
src/internal/bytealg/compare_loong64.s
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
|
||||
// R4 = a_base
|
||||
// R5 = a_len
|
||||
// R6 = a_cap (unused)
|
||||
// R7 = b_base (want in R6)
|
||||
// R8 = b_len (want in R7)
|
||||
// R9 = b_cap (unused)
|
||||
MOVV R7, R6
|
||||
MOVV R8, R7
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
|
||||
// R4 = a_base
|
||||
// R5 = a_len
|
||||
// R6 = b_base
|
||||
// R7 = b_len
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
// On entry:
|
||||
// R5 length of a
|
||||
// R7 length of b
|
||||
// R4 points to the start of a
|
||||
// R6 points to the start of b
|
||||
// R13 points to the return value (-1/0/1)
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
BEQ R4, R6, samebytes // same start of a and b
|
||||
|
||||
SGTU R5, R7, R9
|
||||
BNE R0, R9, r2_lt_r1
|
||||
MOVV R5, R14
|
||||
JMP entry
|
||||
r2_lt_r1:
|
||||
MOVV R7, R14 // R14 is min(R4, R5)
|
||||
entry:
|
||||
ADDV R4, R14, R12 // R6 start of a, R14 end of a
|
||||
BEQ R4, R12, samebytes // length is 0
|
||||
|
||||
SRLV $4, R14 // R14 is number of chunks
|
||||
BEQ R0, R14, byte_loop
|
||||
|
||||
// make sure both a and b are aligned.
|
||||
OR R4, R6, R15
|
||||
AND $7, R15
|
||||
BNE R0, R15, byte_loop
|
||||
|
||||
PCALIGN $16
|
||||
chunk16_loop:
|
||||
BEQ R0, R14, byte_loop
|
||||
MOVV (R4), R8
|
||||
MOVV (R6), R9
|
||||
BNE R8, R9, byte_loop
|
||||
MOVV 8(R4), R16
|
||||
MOVV 8(R6), R17
|
||||
ADDV $16, R4
|
||||
ADDV $16, R6
|
||||
SUBVU $1, R14
|
||||
BEQ R16, R17, chunk16_loop
|
||||
SUBV $8, R4
|
||||
SUBV $8, R6
|
||||
|
||||
byte_loop:
|
||||
BEQ R4, R12, samebytes
|
||||
MOVBU (R4), R8
|
||||
ADDVU $1, R4
|
||||
MOVBU (R6), R9
|
||||
ADDVU $1, R6
|
||||
BEQ R8, R9, byte_loop
|
||||
|
||||
byte_cmp:
|
||||
SGTU R8, R9, R4 // R12 = 1 if (R8 > R9)
|
||||
BNE R0, R4, ret
|
||||
MOVV $-1, R4
|
||||
JMP ret
|
||||
|
||||
samebytes:
|
||||
SGTU R5, R7, R8
|
||||
SGTU R7, R5, R9
|
||||
SUBV R9, R8, R4
|
||||
|
||||
ret:
|
||||
RET
|
||||
88
src/internal/bytealg/compare_mips64x.s
Normal file
88
src/internal/bytealg/compare_mips64x.s
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips64 || mips64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB),NOSPLIT,$0-56
|
||||
MOVV a_base+0(FP), R3
|
||||
MOVV b_base+24(FP), R4
|
||||
MOVV a_len+8(FP), R1
|
||||
MOVV b_len+32(FP), R2
|
||||
MOVV $ret+48(FP), R9
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
|
||||
MOVV a_base+0(FP), R3
|
||||
MOVV b_base+16(FP), R4
|
||||
MOVV a_len+8(FP), R1
|
||||
MOVV b_len+24(FP), R2
|
||||
MOVV $ret+32(FP), R9
|
||||
JMP cmpbody<>(SB)
|
||||
|
||||
// On entry:
|
||||
// R1 length of a
|
||||
// R2 length of b
|
||||
// R3 points to the start of a
|
||||
// R4 points to the start of b
|
||||
// R9 points to the return value (-1/0/1)
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
BEQ R3, R4, samebytes // same start of a and b
|
||||
|
||||
SGTU R1, R2, R7
|
||||
BNE R0, R7, r2_lt_r1
|
||||
MOVV R1, R10
|
||||
JMP entry
|
||||
r2_lt_r1:
|
||||
MOVV R2, R10 // R10 is min(R1, R2)
|
||||
entry:
|
||||
ADDV R3, R10, R8 // R3 start of a, R8 end of a
|
||||
BEQ R3, R8, samebytes // length is 0
|
||||
|
||||
SRLV $4, R10 // R10 is number of chunks
|
||||
BEQ R0, R10, byte_loop
|
||||
|
||||
// make sure both a and b are aligned.
|
||||
OR R3, R4, R11
|
||||
AND $7, R11
|
||||
BNE R0, R11, byte_loop
|
||||
|
||||
chunk16_loop:
|
||||
BEQ R0, R10, byte_loop
|
||||
MOVV (R3), R6
|
||||
MOVV (R4), R7
|
||||
BNE R6, R7, byte_loop
|
||||
MOVV 8(R3), R13
|
||||
MOVV 8(R4), R14
|
||||
ADDV $16, R3
|
||||
ADDV $16, R4
|
||||
SUBVU $1, R10
|
||||
BEQ R13, R14, chunk16_loop
|
||||
SUBV $8, R3
|
||||
SUBV $8, R4
|
||||
|
||||
byte_loop:
|
||||
BEQ R3, R8, samebytes
|
||||
MOVBU (R3), R6
|
||||
ADDVU $1, R3
|
||||
MOVBU (R4), R7
|
||||
ADDVU $1, R4
|
||||
BEQ R6, R7, byte_loop
|
||||
|
||||
byte_cmp:
|
||||
SGTU R6, R7, R8 // R8 = 1 if (R6 > R7)
|
||||
BNE R0, R8, ret
|
||||
MOVV $-1, R8
|
||||
JMP ret
|
||||
|
||||
samebytes:
|
||||
SGTU R1, R2, R6
|
||||
SGTU R2, R1, R7
|
||||
SUBV R7, R6, R8
|
||||
|
||||
ret:
|
||||
MOVV R8, (R9)
|
||||
RET
|
||||
72
src/internal/bytealg/compare_mipsx.s
Normal file
72
src/internal/bytealg/compare_mipsx.s
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips || mipsle
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB),NOSPLIT,$0-28
|
||||
MOVW a_base+0(FP), R3
|
||||
MOVW b_base+12(FP), R4
|
||||
MOVW a_len+4(FP), R1
|
||||
MOVW b_len+16(FP), R2
|
||||
BEQ R3, R4, samebytes
|
||||
SGTU R1, R2, R7
|
||||
MOVW R1, R8
|
||||
CMOVN R7, R2, R8 // R8 is min(R1, R2)
|
||||
|
||||
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
|
||||
loop:
|
||||
BEQ R3, R8, samebytes
|
||||
|
||||
MOVBU (R3), R6
|
||||
ADDU $1, R3
|
||||
MOVBU (R4), R7
|
||||
ADDU $1, R4
|
||||
BEQ R6, R7 , loop
|
||||
|
||||
SGTU R6, R7, R8
|
||||
MOVW $-1, R6
|
||||
CMOVZ R8, R6, R8
|
||||
JMP cmp_ret
|
||||
samebytes:
|
||||
SGTU R1, R2, R6
|
||||
SGTU R2, R1, R7
|
||||
SUBU R7, R6, R8
|
||||
cmp_ret:
|
||||
MOVW R8, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
|
||||
MOVW a_base+0(FP), R3
|
||||
MOVW a_len+4(FP), R1
|
||||
MOVW b_base+8(FP), R4
|
||||
MOVW b_len+12(FP), R2
|
||||
BEQ R3, R4, samebytes
|
||||
SGTU R1, R2, R7
|
||||
MOVW R1, R8
|
||||
CMOVN R7, R2, R8 // R8 is min(R1, R2)
|
||||
|
||||
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
|
||||
loop:
|
||||
BEQ R3, R8, samebytes // all compared bytes were the same; compare lengths
|
||||
|
||||
MOVBU (R3), R6
|
||||
ADDU $1, R3
|
||||
MOVBU (R4), R7
|
||||
ADDU $1, R4
|
||||
BEQ R6, R7 , loop
|
||||
// bytes differed
|
||||
SGTU R6, R7, R8
|
||||
MOVW $-1, R6
|
||||
CMOVZ R8, R6, R8
|
||||
JMP cmp_ret
|
||||
samebytes:
|
||||
SGTU R1, R2, R6
|
||||
SGTU R2, R1, R7
|
||||
SUBU R7, R6, R8
|
||||
cmp_ret:
|
||||
MOVW R8, ret+16(FP)
|
||||
RET
|
||||
23
src/internal/bytealg/compare_native.go
Normal file
23
src/internal/bytealg/compare_native.go
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build 386 || amd64 || s390x || arm || arm64 || loong64 || ppc64 || ppc64le || mips || mipsle || wasm || mips64 || mips64le || riscv64
|
||||
|
||||
package bytealg
|
||||
|
||||
import _ "unsafe" // For go:linkname
|
||||
|
||||
//go:noescape
|
||||
func Compare(a, b []byte) int
|
||||
|
||||
func CompareString(a, b string) int {
|
||||
return abigen_runtime_cmpstring(a, b)
|
||||
}
|
||||
|
||||
// The declaration below generates ABI wrappers for functions
|
||||
// implemented in assembly in this package but declared in another
|
||||
// package.
|
||||
|
||||
//go:linkname abigen_runtime_cmpstring runtime.cmpstring
|
||||
func abigen_runtime_cmpstring(a, b string) int
|
||||
342
src/internal/bytealg/compare_ppc64x.s
Normal file
342
src/internal/bytealg/compare_ppc64x.s
Normal file
@@ -0,0 +1,342 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Helper names for x-form loads in BE ordering.
|
||||
#ifdef GOARCH_ppc64le
|
||||
#define _LDBEX MOVDBR
|
||||
#define _LWBEX MOVWBR
|
||||
#define _LHBEX MOVHBR
|
||||
#else
|
||||
#define _LDBEX MOVD
|
||||
#define _LWBEX MOVW
|
||||
#define _LHBEX MOVH
|
||||
#endif
|
||||
|
||||
#ifdef GOPPC64_power9
|
||||
#define SETB_CR0(rout) SETB CR0, rout
|
||||
#define SETB_CR1(rout) SETB CR1, rout
|
||||
#define SETB_INIT()
|
||||
#define SETB_CR0_NE(rout) SETB_CR0(rout)
|
||||
#else
|
||||
// A helper macro to emulate SETB on P8. This assumes
|
||||
// -1 is in R20, and 1 is in R21. crxlt and crxeq must
|
||||
// also be the same CR field.
|
||||
#define _SETB(crxlt, crxeq, rout) \
|
||||
ISEL crxeq,R0,R21,rout \
|
||||
ISEL crxlt,R20,rout,rout
|
||||
|
||||
// A special case when it is know the comparison
|
||||
// will always be not equal. The result must be -1 or 1.
|
||||
#define SETB_CR0_NE(rout) \
|
||||
ISEL CR0LT,R20,R21,rout
|
||||
|
||||
#define SETB_CR0(rout) _SETB(CR0LT, CR0EQ, rout)
|
||||
#define SETB_CR1(rout) _SETB(CR1LT, CR1EQ, rout)
|
||||
#define SETB_INIT() \
|
||||
MOVD $-1,R20 \
|
||||
MOVD $1,R21
|
||||
#endif
|
||||
|
||||
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
|
||||
// incoming:
|
||||
// R3 a addr
|
||||
// R4 a len
|
||||
// R6 b addr
|
||||
// R7 b len
|
||||
//
|
||||
// on entry to cmpbody:
|
||||
// R3 return value if len(a) == len(b)
|
||||
// R5 a addr
|
||||
// R6 b addr
|
||||
// R9 min(len(a),len(b))
|
||||
SETB_INIT()
|
||||
MOVD R3,R5
|
||||
CMP R4,R7,CR0
|
||||
CMP R3,R6,CR7
|
||||
ISEL CR0LT,R4,R7,R9
|
||||
SETB_CR0(R3)
|
||||
BC $12,30,LR // beqlr cr7
|
||||
BR cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// incoming:
|
||||
// R3 a addr -> R5
|
||||
// R4 a len -> R3
|
||||
// R5 b addr -> R6
|
||||
// R6 b len -> R4
|
||||
//
|
||||
// on entry to cmpbody:
|
||||
// R3 compare value if compared length is same.
|
||||
// R5 a addr
|
||||
// R6 b addr
|
||||
// R9 min(len(a),len(b))
|
||||
SETB_INIT()
|
||||
CMP R4,R6,CR0
|
||||
CMP R3,R5,CR7
|
||||
ISEL CR0LT,R4,R6,R9
|
||||
MOVD R5,R6
|
||||
MOVD R3,R5
|
||||
SETB_CR0(R3)
|
||||
BC $12,30,LR // beqlr cr7
|
||||
BR cmpbody<>(SB)
|
||||
|
||||
#ifdef GOARCH_ppc64le
|
||||
DATA byteswap<>+0(SB)/8, $0x0706050403020100
|
||||
DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
|
||||
GLOBL byteswap<>+0(SB), RODATA, $16
|
||||
#define SWAP V21
|
||||
#endif
|
||||
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
start:
|
||||
CMP R9,$16,CR0
|
||||
CMP R9,$32,CR1
|
||||
CMP R9,$64,CR2
|
||||
MOVD $16,R10
|
||||
BLT cmp8
|
||||
BLT CR1,cmp16
|
||||
BLT CR2,cmp32
|
||||
|
||||
cmp64: // >= 64B
|
||||
DCBT (R5) // optimize for size>=64
|
||||
DCBT (R6) // cache hint
|
||||
|
||||
SRD $6,R9,R14 // There is at least one iteration.
|
||||
MOVD R14,CTR
|
||||
ANDCC $63,R9,R9
|
||||
CMP R9,$16,CR1 // Do setup for tail check early on.
|
||||
CMP R9,$32,CR2
|
||||
CMP R9,$48,CR3
|
||||
ADD $-16,R9,R9
|
||||
|
||||
MOVD $32,R11 // set offsets to load into vector
|
||||
MOVD $48,R12 // set offsets to load into vector
|
||||
|
||||
PCALIGN $16
|
||||
cmp64_loop:
|
||||
LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector
|
||||
LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different // jump out if its different
|
||||
|
||||
LXVD2X (R5)(R10),V3 // load bytes of A at offset 16 into vector
|
||||
LXVD2X (R6)(R10),V4 // load bytes of B at offset 16 into vector
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R5)(R11),V3 // load bytes of A at offset 32 into vector
|
||||
LXVD2X (R6)(R11),V4 // load bytes of B at offset 32 into vector
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R5)(R12),V3 // load bytes of A at offset 64 into vector
|
||||
LXVD2X (R6)(R12),V4 // load bytes of B at offset 64 into vector
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
ADD $64,R5,R5 // increment to next 64 bytes of A
|
||||
ADD $64,R6,R6 // increment to next 64 bytes of B
|
||||
BDNZ cmp64_loop
|
||||
BC $12,2,LR // beqlr
|
||||
|
||||
// Finish out tail with minimal overlapped checking.
|
||||
// Note, 0 tail is handled by beqlr above.
|
||||
BLE CR1,cmp64_tail_gt0
|
||||
BLE CR2,cmp64_tail_gt16
|
||||
BLE CR3,cmp64_tail_gt32
|
||||
|
||||
cmp64_tail_gt48: // 49 - 63 B
|
||||
LXVD2X (R0)(R5),V3
|
||||
LXVD2X (R0)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R5)(R10),V3
|
||||
LXVD2X (R6)(R10),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R5)(R11),V3
|
||||
LXVD2X (R6)(R11),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
BR cmp64_tail_gt0
|
||||
|
||||
PCALIGN $16
|
||||
cmp64_tail_gt32: // 33 - 48B
|
||||
LXVD2X (R0)(R5),V3
|
||||
LXVD2X (R0)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R5)(R10),V3
|
||||
LXVD2X (R6)(R10),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
BR cmp64_tail_gt0
|
||||
|
||||
PCALIGN $16
|
||||
cmp64_tail_gt16: // 17 - 32B
|
||||
LXVD2X (R0)(R5),V3
|
||||
LXVD2X (R0)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
BR cmp64_tail_gt0
|
||||
|
||||
PCALIGN $16
|
||||
cmp64_tail_gt0: // 1 - 16B
|
||||
LXVD2X (R5)(R9),V3
|
||||
LXVD2X (R6)(R9),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
cmp32: // 32 - 63B
|
||||
ANDCC $31,R9,R9
|
||||
|
||||
LXVD2X (R0)(R5),V3
|
||||
LXVD2X (R0)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R10)(R5),V3
|
||||
LXVD2X (R10)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
BC $12,2,LR // beqlr
|
||||
ADD R9,R10,R10
|
||||
|
||||
LXVD2X (R9)(R5),V3
|
||||
LXVD2X (R9)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
|
||||
LXVD2X (R10)(R5),V3
|
||||
LXVD2X (R10)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
cmp16: // 16 - 31B
|
||||
ANDCC $15,R9,R9
|
||||
LXVD2X (R0)(R5),V3
|
||||
LXVD2X (R0)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
BC $12,2,LR // beqlr
|
||||
|
||||
LXVD2X (R9)(R5),V3
|
||||
LXVD2X (R9)(R6),V4
|
||||
VCMPEQUDCC V3,V4,V1
|
||||
BGE CR6,different
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
different:
|
||||
#ifdef GOARCH_ppc64le
|
||||
MOVD $byteswap<>+00(SB),R16
|
||||
LXVD2X (R16)(R0),SWAP // Set up swap string
|
||||
|
||||
VPERM V3,V3,SWAP,V3
|
||||
VPERM V4,V4,SWAP,V4
|
||||
#endif
|
||||
|
||||
MFVSRD VS35,R16 // move upper doublewords of A and B into GPR for comparison
|
||||
MFVSRD VS36,R10
|
||||
|
||||
CMPU R16,R10
|
||||
BEQ lower
|
||||
SETB_CR0_NE(R3)
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
lower:
|
||||
VSLDOI $8,V3,V3,V3 // move lower doublewords of A and B into GPR for comparison
|
||||
MFVSRD VS35,R16
|
||||
VSLDOI $8,V4,V4,V4
|
||||
MFVSRD VS36,R10
|
||||
|
||||
CMPU R16,R10
|
||||
SETB_CR0_NE(R3)
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10)
|
||||
#ifdef GOPPC64_power10
|
||||
SLD $56,R9,R9
|
||||
LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled.
|
||||
LXVLL R6,R9,V4
|
||||
VCMPUQ V3,V4,CR0 // Compare as a 128b integer.
|
||||
SETB_CR0(R6)
|
||||
ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value.
|
||||
RET
|
||||
#else
|
||||
CMP R9,$8
|
||||
BLT cmp4
|
||||
ANDCC $7,R9,R9
|
||||
_LDBEX (R0)(R5),R10
|
||||
_LDBEX (R0)(R6),R11
|
||||
_LDBEX (R9)(R5),R12
|
||||
_LDBEX (R9)(R6),R14
|
||||
CMPU R10,R11,CR0
|
||||
SETB_CR0(R5)
|
||||
CMPU R12,R14,CR1
|
||||
SETB_CR1(R6)
|
||||
CRAND CR0EQ,CR1EQ,CR1EQ // If both equal, length determines return value.
|
||||
ISEL CR0EQ,R6,R5,R4
|
||||
ISEL CR1EQ,R3,R4,R3
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
cmp4: // 4 - 7B
|
||||
CMP R9,$4
|
||||
BLT cmp2
|
||||
ANDCC $3,R9,R9
|
||||
_LWBEX (R0)(R5),R10
|
||||
_LWBEX (R0)(R6),R11
|
||||
_LWBEX (R9)(R5),R12
|
||||
_LWBEX (R9)(R6),R14
|
||||
RLDIMI $32,R10,$0,R12
|
||||
RLDIMI $32,R11,$0,R14
|
||||
CMPU R12,R14
|
||||
BR cmp0
|
||||
|
||||
PCALIGN $16
|
||||
cmp2: // 2 - 3B
|
||||
CMP R9,$2
|
||||
BLT cmp1
|
||||
ANDCC $1,R9,R9
|
||||
_LHBEX (R0)(R5),R10
|
||||
_LHBEX (R0)(R6),R11
|
||||
_LHBEX (R9)(R5),R12
|
||||
_LHBEX (R9)(R6),R14
|
||||
RLDIMI $32,R10,$0,R12
|
||||
RLDIMI $32,R11,$0,R14
|
||||
CMPU R12,R14
|
||||
BR cmp0
|
||||
|
||||
PCALIGN $16
|
||||
cmp1:
|
||||
CMP R9,$0
|
||||
BEQ cmp0
|
||||
MOVBZ (R5),R10
|
||||
MOVBZ (R6),R11
|
||||
CMPU R10,R11
|
||||
cmp0:
|
||||
SETB_CR0(R6)
|
||||
ISEL CR0EQ,R3,R6,R3
|
||||
RET
|
||||
#endif
|
||||
222
src/internal/bytealg/compare_riscv64.s
Normal file
222
src/internal/bytealg/compare_riscv64.s
Normal file
@@ -0,0 +1,222 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
|
||||
// X10 = a_base
|
||||
// X11 = a_len
|
||||
// X12 = a_cap (unused)
|
||||
// X13 = b_base (want in X12)
|
||||
// X14 = b_len (want in X13)
|
||||
// X15 = b_cap (unused)
|
||||
MOV X13, X12
|
||||
MOV X14, X13
|
||||
JMP compare<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// X10 = a_base
|
||||
// X11 = a_len
|
||||
// X12 = b_base
|
||||
// X13 = b_len
|
||||
JMP compare<>(SB)
|
||||
|
||||
// On entry:
|
||||
// X10 points to start of a
|
||||
// X11 length of a
|
||||
// X12 points to start of b
|
||||
// X13 length of b
|
||||
// for non-regabi X14 points to the address to store the return value (-1/0/1)
|
||||
// for regabi the return value in X10
|
||||
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
|
||||
BEQ X10, X12, cmp_len
|
||||
|
||||
MOV X11, X5
|
||||
BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
|
||||
MOV X13, X5
|
||||
use_a_len:
|
||||
BEQZ X5, cmp_len
|
||||
|
||||
MOV $32, X6
|
||||
BLT X5, X6, check8_unaligned
|
||||
|
||||
// Check alignment - if alignment differs we have to do one byte at a time.
|
||||
AND $7, X10, X7
|
||||
AND $7, X12, X8
|
||||
BNE X7, X8, check8_unaligned
|
||||
BEQZ X7, compare32
|
||||
|
||||
// Check one byte at a time until we reach 8 byte alignment.
|
||||
SUB X7, X0, X7
|
||||
ADD $8, X7, X7
|
||||
SUB X7, X5, X5
|
||||
align:
|
||||
SUB $1, X7
|
||||
MOVBU 0(X10), X8
|
||||
MOVBU 0(X12), X9
|
||||
BNE X8, X9, cmp
|
||||
ADD $1, X10
|
||||
ADD $1, X12
|
||||
BNEZ X7, align
|
||||
|
||||
check32:
|
||||
// X6 contains $32
|
||||
BLT X5, X6, compare16
|
||||
compare32:
|
||||
MOV 0(X10), X15
|
||||
MOV 0(X12), X16
|
||||
MOV 8(X10), X17
|
||||
MOV 8(X12), X18
|
||||
BNE X15, X16, cmp8a
|
||||
BNE X17, X18, cmp8b
|
||||
MOV 16(X10), X15
|
||||
MOV 16(X12), X16
|
||||
MOV 24(X10), X17
|
||||
MOV 24(X12), X18
|
||||
BNE X15, X16, cmp8a
|
||||
BNE X17, X18, cmp8b
|
||||
ADD $32, X10
|
||||
ADD $32, X12
|
||||
SUB $32, X5
|
||||
BGE X5, X6, compare32
|
||||
BEQZ X5, cmp_len
|
||||
|
||||
check16:
|
||||
MOV $16, X6
|
||||
BLT X5, X6, check8_unaligned
|
||||
compare16:
|
||||
MOV 0(X10), X15
|
||||
MOV 0(X12), X16
|
||||
MOV 8(X10), X17
|
||||
MOV 8(X12), X18
|
||||
BNE X15, X16, cmp8a
|
||||
BNE X17, X18, cmp8b
|
||||
ADD $16, X10
|
||||
ADD $16, X12
|
||||
SUB $16, X5
|
||||
BEQZ X5, cmp_len
|
||||
|
||||
check8_unaligned:
|
||||
MOV $8, X6
|
||||
BLT X5, X6, check4_unaligned
|
||||
compare8_unaligned:
|
||||
MOVBU 0(X10), X8
|
||||
MOVBU 1(X10), X15
|
||||
MOVBU 2(X10), X17
|
||||
MOVBU 3(X10), X19
|
||||
MOVBU 4(X10), X21
|
||||
MOVBU 5(X10), X23
|
||||
MOVBU 6(X10), X25
|
||||
MOVBU 7(X10), X29
|
||||
MOVBU 0(X12), X9
|
||||
MOVBU 1(X12), X16
|
||||
MOVBU 2(X12), X18
|
||||
MOVBU 3(X12), X20
|
||||
MOVBU 4(X12), X22
|
||||
MOVBU 5(X12), X24
|
||||
MOVBU 6(X12), X28
|
||||
MOVBU 7(X12), X30
|
||||
BNE X8, X9, cmp1a
|
||||
BNE X15, X16, cmp1b
|
||||
BNE X17, X18, cmp1c
|
||||
BNE X19, X20, cmp1d
|
||||
BNE X21, X22, cmp1e
|
||||
BNE X23, X24, cmp1f
|
||||
BNE X25, X28, cmp1g
|
||||
BNE X29, X30, cmp1h
|
||||
ADD $8, X10
|
||||
ADD $8, X12
|
||||
SUB $8, X5
|
||||
BGE X5, X6, compare8_unaligned
|
||||
BEQZ X5, cmp_len
|
||||
|
||||
check4_unaligned:
|
||||
MOV $4, X6
|
||||
BLT X5, X6, compare1
|
||||
compare4_unaligned:
|
||||
MOVBU 0(X10), X8
|
||||
MOVBU 1(X10), X15
|
||||
MOVBU 2(X10), X17
|
||||
MOVBU 3(X10), X19
|
||||
MOVBU 0(X12), X9
|
||||
MOVBU 1(X12), X16
|
||||
MOVBU 2(X12), X18
|
||||
MOVBU 3(X12), X20
|
||||
BNE X8, X9, cmp1a
|
||||
BNE X15, X16, cmp1b
|
||||
BNE X17, X18, cmp1c
|
||||
BNE X19, X20, cmp1d
|
||||
ADD $4, X10
|
||||
ADD $4, X12
|
||||
SUB $4, X5
|
||||
BGE X5, X6, compare4_unaligned
|
||||
|
||||
compare1:
|
||||
BEQZ X5, cmp_len
|
||||
MOVBU 0(X10), X8
|
||||
MOVBU 0(X12), X9
|
||||
BNE X8, X9, cmp
|
||||
ADD $1, X10
|
||||
ADD $1, X12
|
||||
SUB $1, X5
|
||||
JMP compare1
|
||||
|
||||
// Compare 8 bytes of memory in X15/X16 that are known to differ.
|
||||
cmp8a:
|
||||
MOV X15, X17
|
||||
MOV X16, X18
|
||||
|
||||
// Compare 8 bytes of memory in X17/X18 that are known to differ.
|
||||
cmp8b:
|
||||
MOV $0xff, X19
|
||||
cmp8_loop:
|
||||
AND X17, X19, X8
|
||||
AND X18, X19, X9
|
||||
BNE X8, X9, cmp
|
||||
SLLI $8, X19
|
||||
JMP cmp8_loop
|
||||
|
||||
cmp1a:
|
||||
SLTU X9, X8, X5
|
||||
SLTU X8, X9, X6
|
||||
JMP cmp_ret
|
||||
cmp1b:
|
||||
SLTU X16, X15, X5
|
||||
SLTU X15, X16, X6
|
||||
JMP cmp_ret
|
||||
cmp1c:
|
||||
SLTU X18, X17, X5
|
||||
SLTU X17, X18, X6
|
||||
JMP cmp_ret
|
||||
cmp1d:
|
||||
SLTU X20, X19, X5
|
||||
SLTU X19, X20, X6
|
||||
JMP cmp_ret
|
||||
cmp1e:
|
||||
SLTU X22, X21, X5
|
||||
SLTU X21, X22, X6
|
||||
JMP cmp_ret
|
||||
cmp1f:
|
||||
SLTU X24, X23, X5
|
||||
SLTU X23, X24, X6
|
||||
JMP cmp_ret
|
||||
cmp1g:
|
||||
SLTU X28, X25, X5
|
||||
SLTU X25, X28, X6
|
||||
JMP cmp_ret
|
||||
cmp1h:
|
||||
SLTU X30, X29, X5
|
||||
SLTU X29, X30, X6
|
||||
JMP cmp_ret
|
||||
|
||||
cmp_len:
|
||||
MOV X11, X8
|
||||
MOV X13, X9
|
||||
cmp:
|
||||
SLTU X9, X8, X5
|
||||
SLTU X8, X9, X6
|
||||
cmp_ret:
|
||||
SUB X5, X6, X10
|
||||
RET
|
||||
69
src/internal/bytealg/compare_s390x.s
Normal file
69
src/internal/bytealg/compare_s390x.s
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
|
||||
MOVD a_base+0(FP), R3
|
||||
MOVD a_len+8(FP), R4
|
||||
MOVD b_base+24(FP), R5
|
||||
MOVD b_len+32(FP), R6
|
||||
LA ret+48(FP), R7
|
||||
BR cmpbody<>(SB)
|
||||
|
||||
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
|
||||
MOVD a_base+0(FP), R3
|
||||
MOVD a_len+8(FP), R4
|
||||
MOVD b_base+16(FP), R5
|
||||
MOVD b_len+24(FP), R6
|
||||
LA ret+32(FP), R7
|
||||
BR cmpbody<>(SB)
|
||||
|
||||
// input:
|
||||
// R3 = a
|
||||
// R4 = alen
|
||||
// R5 = b
|
||||
// R6 = blen
|
||||
// R7 = address of output word (stores -1/0/1 here)
|
||||
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMPBEQ R3, R5, cmplengths
|
||||
MOVD R4, R8
|
||||
CMPBLE R4, R6, amin
|
||||
MOVD R6, R8
|
||||
amin:
|
||||
CMPBEQ R8, $0, cmplengths
|
||||
CMP R8, $256
|
||||
BLE tail
|
||||
loop:
|
||||
CLC $256, 0(R3), 0(R5)
|
||||
BGT gt
|
||||
BLT lt
|
||||
SUB $256, R8
|
||||
MOVD $256(R3), R3
|
||||
MOVD $256(R5), R5
|
||||
CMP R8, $256
|
||||
BGT loop
|
||||
tail:
|
||||
SUB $1, R8
|
||||
EXRL $cmpbodyclc<>(SB), R8
|
||||
BGT gt
|
||||
BLT lt
|
||||
cmplengths:
|
||||
CMP R4, R6
|
||||
BEQ eq
|
||||
BLT lt
|
||||
gt:
|
||||
MOVD $1, 0(R7)
|
||||
RET
|
||||
lt:
|
||||
MOVD $-1, 0(R7)
|
||||
RET
|
||||
eq:
|
||||
MOVD $0, 0(R7)
|
||||
RET
|
||||
|
||||
TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CLC $1, 0(R3), 0(R5)
|
||||
RET
|
||||
115
src/internal/bytealg/compare_wasm.s
Normal file
115
src/internal/bytealg/compare_wasm.s
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Compare(SB), NOSPLIT, $0-56
|
||||
Get SP
|
||||
I64Load a_base+0(FP)
|
||||
I64Load a_len+8(FP)
|
||||
I64Load b_base+24(FP)
|
||||
I64Load b_len+32(FP)
|
||||
Call cmpbody<>(SB)
|
||||
I64Store ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·cmpstring(SB), NOSPLIT, $0-40
|
||||
Get SP
|
||||
I64Load a_base+0(FP)
|
||||
I64Load a_len+8(FP)
|
||||
I64Load b_base+16(FP)
|
||||
I64Load b_len+24(FP)
|
||||
Call cmpbody<>(SB)
|
||||
I64Store ret+32(FP)
|
||||
RET
|
||||
|
||||
// params: a, alen, b, blen
|
||||
// ret: -1/0/1
|
||||
TEXT cmpbody<>(SB), NOSPLIT, $0-0
|
||||
// len = min(alen, blen)
|
||||
Get R1
|
||||
Get R3
|
||||
Get R1
|
||||
Get R3
|
||||
I64LtU
|
||||
Select
|
||||
Set R4
|
||||
|
||||
Get R0
|
||||
I32WrapI64
|
||||
Get R2
|
||||
I32WrapI64
|
||||
Get R4
|
||||
I32WrapI64
|
||||
Call memcmp<>(SB)
|
||||
I64ExtendI32S
|
||||
Tee R5
|
||||
|
||||
I64Eqz
|
||||
If
|
||||
// check length
|
||||
Get R1
|
||||
Get R3
|
||||
I64Sub
|
||||
Set R5
|
||||
End
|
||||
|
||||
I64Const $0
|
||||
I64Const $-1
|
||||
I64Const $1
|
||||
Get R5
|
||||
I64Const $0
|
||||
I64LtS
|
||||
Select
|
||||
Get R5
|
||||
I64Eqz
|
||||
Select
|
||||
Return
|
||||
|
||||
// compiled with emscripten
|
||||
// params: a, b, len
|
||||
// ret: <0/0/>0
|
||||
TEXT memcmp<>(SB), NOSPLIT, $0-0
|
||||
Get R2
|
||||
If $1
|
||||
Loop
|
||||
Get R0
|
||||
I32Load8S $0
|
||||
Tee R3
|
||||
Get R1
|
||||
I32Load8S $0
|
||||
Tee R4
|
||||
I32Eq
|
||||
If
|
||||
Get R0
|
||||
I32Const $1
|
||||
I32Add
|
||||
Set R0
|
||||
Get R1
|
||||
I32Const $1
|
||||
I32Add
|
||||
Set R1
|
||||
I32Const $0
|
||||
Get R2
|
||||
I32Const $-1
|
||||
I32Add
|
||||
Tee R2
|
||||
I32Eqz
|
||||
BrIf $3
|
||||
Drop
|
||||
Br $1
|
||||
End
|
||||
End
|
||||
Get R3
|
||||
I32Const $255
|
||||
I32And
|
||||
Get R4
|
||||
I32Const $255
|
||||
I32And
|
||||
I32Sub
|
||||
Else
|
||||
I32Const $0
|
||||
End
|
||||
Return
|
||||
229
src/internal/bytealg/count_amd64.s
Normal file
229
src/internal/bytealg/count_amd64.s
Normal file
@@ -0,0 +1,229 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "asm_amd64.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Count(SB),NOSPLIT,$0-40
|
||||
#ifndef hasPOPCNT
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
|
||||
JEQ 2(PC)
|
||||
JMP ·countGeneric(SB)
|
||||
#endif
|
||||
MOVQ b_base+0(FP), SI
|
||||
MOVQ b_len+8(FP), BX
|
||||
MOVB c+24(FP), AL
|
||||
LEAQ ret+32(FP), R8
|
||||
JMP countbody<>(SB)
|
||||
|
||||
TEXT ·CountString(SB),NOSPLIT,$0-32
|
||||
#ifndef hasPOPCNT
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
|
||||
JEQ 2(PC)
|
||||
JMP ·countGenericString(SB)
|
||||
#endif
|
||||
MOVQ s_base+0(FP), SI
|
||||
MOVQ s_len+8(FP), BX
|
||||
MOVB c+16(FP), AL
|
||||
LEAQ ret+24(FP), R8
|
||||
JMP countbody<>(SB)
|
||||
|
||||
// input:
|
||||
// SI: data
|
||||
// BX: data len
|
||||
// AL: byte sought
|
||||
// R8: address to put result
|
||||
// This function requires the POPCNT instruction.
|
||||
TEXT countbody<>(SB),NOSPLIT,$0
|
||||
// Shuffle X0 around so that each byte contains
|
||||
// the character we're looking for.
|
||||
MOVD AX, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PSHUFL $0, X0, X0
|
||||
|
||||
CMPQ BX, $16
|
||||
JLT small
|
||||
|
||||
MOVQ $0, R12 // Accumulator
|
||||
|
||||
MOVQ SI, DI
|
||||
|
||||
CMPQ BX, $64
|
||||
JAE avx2
|
||||
sse:
|
||||
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
|
||||
JMP sseloopentry
|
||||
|
||||
PCALIGN $16
|
||||
sseloop:
|
||||
// Move the next 16-byte chunk of the data into X1.
|
||||
MOVOU (DI), X1
|
||||
// Compare bytes in X0 to X1.
|
||||
PCMPEQB X0, X1
|
||||
// Take the top bit of each byte in X1 and put the result in DX.
|
||||
PMOVMSKB X1, DX
|
||||
// Count number of matching bytes
|
||||
POPCNTL DX, DX
|
||||
// Accumulate into R12
|
||||
ADDQ DX, R12
|
||||
// Advance to next block.
|
||||
ADDQ $16, DI
|
||||
sseloopentry:
|
||||
CMPQ DI, AX
|
||||
JBE sseloop
|
||||
|
||||
// Get the number of bytes to consider in the last 16 bytes
|
||||
ANDQ $15, BX
|
||||
JZ end
|
||||
|
||||
// Create mask to ignore overlap between previous 16 byte block
|
||||
// and the next.
|
||||
MOVQ $16,CX
|
||||
SUBQ BX, CX
|
||||
MOVQ $0xFFFF, R10
|
||||
SARQ CL, R10
|
||||
SALQ CL, R10
|
||||
|
||||
// Process the last 16-byte chunk. This chunk may overlap with the
|
||||
// chunks we've already searched so we need to mask part of it.
|
||||
MOVOU (AX), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, DX
|
||||
// Apply mask
|
||||
ANDQ R10, DX
|
||||
POPCNTL DX, DX
|
||||
ADDQ DX, R12
|
||||
end:
|
||||
MOVQ R12, (R8)
|
||||
RET
|
||||
|
||||
// handle for lengths < 16
|
||||
small:
|
||||
TESTQ BX, BX
|
||||
JEQ endzero
|
||||
|
||||
// Check if we'll load across a page boundary.
|
||||
LEAQ 16(SI), AX
|
||||
TESTW $0xff0, AX
|
||||
JEQ endofpage
|
||||
|
||||
// We must ignore high bytes as they aren't part of our slice.
|
||||
// Create mask.
|
||||
MOVB BX, CX
|
||||
MOVQ $1, R10
|
||||
SALQ CL, R10
|
||||
SUBQ $1, R10
|
||||
|
||||
// Load data
|
||||
MOVOU (SI), X1
|
||||
// Compare target byte with each byte in data.
|
||||
PCMPEQB X0, X1
|
||||
// Move result bits to integer register.
|
||||
PMOVMSKB X1, DX
|
||||
// Apply mask
|
||||
ANDQ R10, DX
|
||||
POPCNTL DX, DX
|
||||
// Directly return DX, we don't need to accumulate
|
||||
// since we have <16 bytes.
|
||||
MOVQ DX, (R8)
|
||||
RET
|
||||
endzero:
|
||||
MOVQ $0, (R8)
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
// We must ignore low bytes as they aren't part of our slice.
|
||||
MOVQ $16,CX
|
||||
SUBQ BX, CX
|
||||
MOVQ $0xFFFF, R10
|
||||
SARQ CL, R10
|
||||
SALQ CL, R10
|
||||
|
||||
// Load data into the high end of X1.
|
||||
MOVOU -16(SI)(BX*1), X1
|
||||
// Compare target byte with each byte in data.
|
||||
PCMPEQB X0, X1
|
||||
// Move result bits to integer register.
|
||||
PMOVMSKB X1, DX
|
||||
// Apply mask
|
||||
ANDQ R10, DX
|
||||
// Directly return DX, we don't need to accumulate
|
||||
// since we have <16 bytes.
|
||||
POPCNTL DX, DX
|
||||
MOVQ DX, (R8)
|
||||
RET
|
||||
|
||||
avx2:
|
||||
#ifndef hasAVX2
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
|
||||
JNE sse
|
||||
#endif
|
||||
MOVD AX, X0
|
||||
LEAQ -64(SI)(BX*1), R11
|
||||
LEAQ (SI)(BX*1), R13
|
||||
VPBROADCASTB X0, Y1
|
||||
PCALIGN $32
|
||||
avx2_loop:
|
||||
VMOVDQU (DI), Y2
|
||||
VMOVDQU 32(DI), Y4
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPCMPEQB Y1, Y4, Y5
|
||||
VPMOVMSKB Y3, DX
|
||||
VPMOVMSKB Y5, CX
|
||||
POPCNTL DX, DX
|
||||
POPCNTL CX, CX
|
||||
ADDQ DX, R12
|
||||
ADDQ CX, R12
|
||||
ADDQ $64, DI
|
||||
CMPQ DI, R11
|
||||
JLE avx2_loop
|
||||
|
||||
// If last block is already processed,
|
||||
// skip to the end.
|
||||
//
|
||||
// This check is NOT an optimization; if the input length is a
|
||||
// multiple of 64, we must not go through the last leg of the
|
||||
// function because the bit shift count passed to SALQ below would
|
||||
// be 64, which is outside of the 0-63 range supported by those
|
||||
// instructions.
|
||||
//
|
||||
// Tests in the bytes and strings packages with input lengths that
|
||||
// are multiples of 64 will break if this condition were removed.
|
||||
CMPQ DI, R13
|
||||
JEQ endavx
|
||||
|
||||
// Load address of the last 64 bytes.
|
||||
// There is an overlap with the previous block.
|
||||
MOVQ R11, DI
|
||||
VMOVDQU (DI), Y2
|
||||
VMOVDQU 32(DI), Y4
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPCMPEQB Y1, Y4, Y5
|
||||
VPMOVMSKB Y3, DX
|
||||
VPMOVMSKB Y5, CX
|
||||
// Exit AVX mode.
|
||||
VZEROUPPER
|
||||
SALQ $32, CX
|
||||
ORQ CX, DX
|
||||
|
||||
// Create mask to ignore overlap between previous 64 byte block
|
||||
// and the next.
|
||||
ANDQ $63, BX
|
||||
MOVQ $64, CX
|
||||
SUBQ BX, CX
|
||||
MOVQ $0xFFFFFFFFFFFFFFFF, R10
|
||||
SALQ CL, R10
|
||||
// Apply mask
|
||||
ANDQ R10, DX
|
||||
POPCNTQ DX, DX
|
||||
ADDQ DX, R12
|
||||
MOVQ R12, (R8)
|
||||
RET
|
||||
endavx:
|
||||
// Exit AVX mode.
|
||||
VZEROUPPER
|
||||
MOVQ R12, (R8)
|
||||
RET
|
||||
43
src/internal/bytealg/count_arm.s
Normal file
43
src/internal/bytealg/count_arm.s
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Count(SB),NOSPLIT,$0-20
|
||||
MOVW b_base+0(FP), R0
|
||||
MOVW b_len+4(FP), R1
|
||||
MOVBU c+12(FP), R2
|
||||
MOVW $ret+16(FP), R7
|
||||
B countbytebody<>(SB)
|
||||
|
||||
TEXT ·CountString(SB),NOSPLIT,$0-16
|
||||
MOVW s_base+0(FP), R0
|
||||
MOVW s_len+4(FP), R1
|
||||
MOVBU c+8(FP), R2
|
||||
MOVW $ret+12(FP), R7
|
||||
B countbytebody<>(SB)
|
||||
|
||||
// Input:
|
||||
// R0: data
|
||||
// R1: data length
|
||||
// R2: byte to find
|
||||
// R7: address to put result
|
||||
//
|
||||
// On exit:
|
||||
// R4 and R8 are clobbered
|
||||
TEXT countbytebody<>(SB),NOSPLIT,$0
|
||||
MOVW $0, R8 // R8 = count of byte to search
|
||||
CMP $0, R1
|
||||
B.EQ done // short path to handle 0-byte case
|
||||
ADD R0, R1 // R1 is the end of the range
|
||||
byte_loop:
|
||||
MOVBU.P 1(R0), R4
|
||||
CMP R4, R2
|
||||
ADD.EQ $1, R8
|
||||
CMP R0, R1
|
||||
B.NE byte_loop
|
||||
done:
|
||||
MOVW R8, (R7)
|
||||
RET
|
||||
92
src/internal/bytealg/count_arm64.s
Normal file
92
src/internal/bytealg/count_arm64.s
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Count(SB),NOSPLIT,$0-40
|
||||
MOVD b_base+0(FP), R0
|
||||
MOVD b_len+8(FP), R2
|
||||
MOVBU c+24(FP), R1
|
||||
MOVD $ret+32(FP), R8
|
||||
B countbytebody<>(SB)
|
||||
|
||||
TEXT ·CountString(SB),NOSPLIT,$0-32
|
||||
MOVD s_base+0(FP), R0
|
||||
MOVD s_len+8(FP), R2
|
||||
MOVBU c+16(FP), R1
|
||||
MOVD $ret+24(FP), R8
|
||||
B countbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// R0: data
|
||||
// R2: data len
|
||||
// R1: byte to find
|
||||
// R8: address to put result
|
||||
TEXT countbytebody<>(SB),NOSPLIT,$0
|
||||
// R11 = count of byte to search
|
||||
MOVD $0, R11
|
||||
// short path to handle 0-byte case
|
||||
CBZ R2, done
|
||||
CMP $0x20, R2
|
||||
// jump directly to tail if length < 32
|
||||
BLO tail
|
||||
ANDS $0x1f, R0, R9
|
||||
BEQ chunk
|
||||
// Work with not 32-byte aligned head
|
||||
BIC $0x1f, R0, R3
|
||||
ADD $0x20, R3
|
||||
PCALIGN $16
|
||||
head_loop:
|
||||
MOVBU.P 1(R0), R5
|
||||
CMP R5, R1
|
||||
CINC EQ, R11, R11
|
||||
SUB $1, R2, R2
|
||||
CMP R0, R3
|
||||
BNE head_loop
|
||||
// Work with 32-byte aligned chunks
|
||||
chunk:
|
||||
BIC $0x1f, R2, R9
|
||||
// The first chunk can also be the last
|
||||
CBZ R9, tail
|
||||
// R3 = end of 32-byte chunks
|
||||
ADD R0, R9, R3
|
||||
MOVD $1, R5
|
||||
VMOV R5, V5.B16
|
||||
// R2 = length of tail
|
||||
SUB R9, R2, R2
|
||||
// Duplicate R1 (byte to search) to 16 1-byte elements of V0
|
||||
VMOV R1, V0.B16
|
||||
// Clear the low 64-bit element of V7 and V8
|
||||
VEOR V7.B8, V7.B8, V7.B8
|
||||
VEOR V8.B8, V8.B8, V8.B8
|
||||
PCALIGN $16
|
||||
// Count the target byte in 32-byte chunk
|
||||
chunk_loop:
|
||||
VLD1.P (R0), [V1.B16, V2.B16]
|
||||
CMP R0, R3
|
||||
VCMEQ V0.B16, V1.B16, V3.B16
|
||||
VCMEQ V0.B16, V2.B16, V4.B16
|
||||
// Clear the higher 7 bits
|
||||
VAND V5.B16, V3.B16, V3.B16
|
||||
VAND V5.B16, V4.B16, V4.B16
|
||||
// Count lanes match the requested byte
|
||||
VADDP V4.B16, V3.B16, V6.B16 // 32B->16B
|
||||
VUADDLV V6.B16, V7
|
||||
// Accumulate the count in low 64-bit element of V8 when inside the loop
|
||||
VADD V7, V8
|
||||
BNE chunk_loop
|
||||
VMOV V8.D[0], R6
|
||||
ADD R6, R11, R11
|
||||
CBZ R2, done
|
||||
tail:
|
||||
// Work with tail shorter than 32 bytes
|
||||
MOVBU.P 1(R0), R5
|
||||
SUB $1, R2, R2
|
||||
CMP R5, R1
|
||||
CINC EQ, R11, R11
|
||||
CBNZ R2, tail
|
||||
done:
|
||||
MOVD R11, (R8)
|
||||
RET
|
||||
27
src/internal/bytealg/count_generic.go
Normal file
27
src/internal/bytealg/count_generic.go
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !amd64 && !arm && !arm64 && !ppc64le && !ppc64 && !riscv64 && !s390x
|
||||
|
||||
package bytealg
|
||||
|
||||
func Count(b []byte, c byte) int {
|
||||
n := 0
|
||||
for _, x := range b {
|
||||
if x == c {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func CountString(s string, c byte) int {
|
||||
n := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
33
src/internal/bytealg/count_native.go
Normal file
33
src/internal/bytealg/count_native.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64 || arm || arm64 || ppc64le || ppc64 || riscv64 || s390x
|
||||
|
||||
package bytealg
|
||||
|
||||
//go:noescape
|
||||
func Count(b []byte, c byte) int
|
||||
|
||||
//go:noescape
|
||||
func CountString(s string, c byte) int
|
||||
|
||||
// A backup implementation to use by assembly.
|
||||
func countGeneric(b []byte, c byte) int {
|
||||
n := 0
|
||||
for _, x := range b {
|
||||
if x == c {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
func countGenericString(s string, c byte) int {
|
||||
n := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
154
src/internal/bytealg/count_ppc64x.s
Normal file
154
src/internal/bytealg/count_ppc64x.s
Normal file
@@ -0,0 +1,154 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64le || ppc64
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// R3 = byte array pointer
|
||||
// R4 = length
|
||||
// R6 = byte to count
|
||||
MTVRD R6, V1 // move compare byte
|
||||
MOVD R6, R5
|
||||
VSPLTB $7, V1, V1 // replicate byte across V1
|
||||
BR countbytebody<>(SB)
|
||||
|
||||
TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32
|
||||
// R3 = byte array pointer
|
||||
// R4 = length
|
||||
// R5 = byte to count
|
||||
MTVRD R5, V1 // move compare byte
|
||||
VSPLTB $7, V1, V1 // replicate byte across V1
|
||||
BR countbytebody<>(SB)
|
||||
|
||||
// R3: addr of string
|
||||
// R4: len of string
|
||||
// R5: byte to count
|
||||
// V1: byte to count, splatted.
|
||||
// On exit:
|
||||
// R3: return value
|
||||
TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
|
||||
MOVD $0, R18 // byte count
|
||||
|
||||
#ifndef GOPPC64_power10
|
||||
RLDIMI $8, R5, $48, R5
|
||||
RLDIMI $16, R5, $32, R5
|
||||
RLDIMI $32, R5, $0, R5 // fill reg with the byte to count
|
||||
#endif
|
||||
|
||||
CMPU R4, $32 // Check if it's a small string (<32 bytes)
|
||||
BLT tail // Jump to the small string case
|
||||
SRD $5, R4, R20
|
||||
MOVD R20, CTR
|
||||
MOVD $16, R21
|
||||
XXLXOR V4, V4, V4
|
||||
XXLXOR V5, V5, V5
|
||||
|
||||
PCALIGN $16
|
||||
cmploop:
|
||||
LXVD2X (R0)(R3), V0 // Count 32B per loop with two vector accumulators.
|
||||
LXVD2X (R21)(R3), V2
|
||||
VCMPEQUB V2, V1, V2
|
||||
VCMPEQUB V0, V1, V0
|
||||
VPOPCNTD V2, V2 // A match is 0xFF or 0. Count the bits into doubleword buckets.
|
||||
VPOPCNTD V0, V0
|
||||
VADDUDM V0, V4, V4 // Accumulate the popcounts. They are 8x the count.
|
||||
VADDUDM V2, V5, V5 // The count will be fixed up afterwards.
|
||||
ADD $32, R3
|
||||
BDNZ cmploop
|
||||
|
||||
VADDUDM V4, V5, V5
|
||||
MFVSRD V5, R18
|
||||
VSLDOI $8, V5, V5, V5
|
||||
MFVSRD V5, R21
|
||||
ADD R21, R18, R18
|
||||
ANDCC $31, R4, R4
|
||||
// Skip the tail processing if no bytes remaining.
|
||||
BEQ tail_0
|
||||
|
||||
#ifdef GOPPC64_power10
|
||||
SRD $3, R18, R18 // Fix the vector loop count before counting the tail on P10.
|
||||
|
||||
tail: // Count the last 0 - 31 bytes.
|
||||
CMP R4, $16
|
||||
BLE small_tail_p10
|
||||
LXV 0(R3), V0
|
||||
VCMPEQUB V0, V1, V0
|
||||
VCNTMBB V0, $1, R14 // Sum the value of bit 0 of each byte of the compare into R14.
|
||||
SRD $56, R14, R14 // The result of VCNTMBB is shifted. Unshift it.
|
||||
ADD R14, R18, R18
|
||||
ADD $16, R3, R3
|
||||
ANDCC $15, R4, R4
|
||||
|
||||
small_tail_p10:
|
||||
SLD $56, R4, R6
|
||||
LXVLL R3, R6, V0
|
||||
VCMPEQUB V0, V1, V0
|
||||
VCLRRB V0, R4, V0 // If <16B being compared, clear matches of the 16-R4 bytes.
|
||||
VCNTMBB V0, $1, R14 // Sum the value of bit 0 of each byte of the compare into R14.
|
||||
SRD $56, R14, R14 // The result of VCNTMBB is shifted. Unshift it.
|
||||
ADD R14, R18, R3
|
||||
RET
|
||||
|
||||
#else
|
||||
tail: // Count the last 0 - 31 bytes.
|
||||
CMP R4, $16
|
||||
BLT tail_8
|
||||
MOVD (R3), R12
|
||||
MOVD 8(R3), R14
|
||||
CMPB R12, R5, R12
|
||||
CMPB R14, R5, R14
|
||||
POPCNTD R12, R12
|
||||
POPCNTD R14, R14
|
||||
ADD R12, R18, R18
|
||||
ADD R14, R18, R18
|
||||
ADD $16, R3, R3
|
||||
ADD $-16, R4, R4
|
||||
|
||||
tail_8: // Count the remaining 0 - 15 bytes.
|
||||
CMP R4, $8
|
||||
BLT tail_4
|
||||
MOVD (R3), R12
|
||||
CMPB R12, R5, R12
|
||||
POPCNTD R12, R12
|
||||
ADD R12, R18, R18
|
||||
ADD $8, R3, R3
|
||||
ADD $-8, R4, R4
|
||||
|
||||
tail_4: // Count the remaining 0 - 7 bytes.
|
||||
CMP R4, $4
|
||||
BLT tail_2
|
||||
MOVWZ (R3), R12
|
||||
CMPB R12, R5, R12
|
||||
SLD $32, R12, R12 // Remove non-participating matches.
|
||||
POPCNTD R12, R12
|
||||
ADD R12, R18, R18
|
||||
ADD $4, R3, R3
|
||||
ADD $-4, R4, R4
|
||||
|
||||
tail_2: // Count the remaining 0 - 3 bytes.
|
||||
CMP R4, $2
|
||||
BLT tail_1
|
||||
MOVHZ (R3), R12
|
||||
CMPB R12, R5, R12
|
||||
SLD $48, R12, R12 // Remove non-participating matches.
|
||||
POPCNTD R12, R12
|
||||
ADD R12, R18, R18
|
||||
ADD $2, R3, R3
|
||||
ADD $-2, R4, R4
|
||||
|
||||
tail_1: // Count the remaining 0 - 1 bytes.
|
||||
CMP R4, $1
|
||||
BLT tail_0
|
||||
MOVBZ (R3), R12
|
||||
CMPB R12, R5, R12
|
||||
ANDCC $0x8, R12, R12
|
||||
ADD R12, R18, R18
|
||||
#endif
|
||||
|
||||
tail_0: // No remaining tail to count.
|
||||
SRD $3, R18, R3 // Fixup count, it is off by 8x.
|
||||
RET
|
||||
49
src/internal/bytealg/count_riscv64.s
Normal file
49
src/internal/bytealg/count_riscv64.s
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Count<ABIInternal>(SB),NOSPLIT,$0-40
|
||||
// X10 = b_base
|
||||
// X11 = b_len
|
||||
// X12 = b_cap (unused)
|
||||
// X13 = byte to count (want in X12)
|
||||
AND $0xff, X13, X12
|
||||
MOV ZERO, X14 // count
|
||||
ADD X10, X11 // end
|
||||
|
||||
PCALIGN $16
|
||||
loop:
|
||||
BEQ X10, X11, done
|
||||
MOVBU (X10), X15
|
||||
ADD $1, X10
|
||||
BNE X12, X15, loop
|
||||
ADD $1, X14
|
||||
JMP loop
|
||||
|
||||
done:
|
||||
MOV X14, X10
|
||||
RET
|
||||
|
||||
TEXT ·CountString<ABIInternal>(SB),NOSPLIT,$0-32
|
||||
// X10 = s_base
|
||||
// X11 = s_len
|
||||
// X12 = byte to count
|
||||
AND $0xff, X12
|
||||
MOV ZERO, X14 // count
|
||||
ADD X10, X11 // end
|
||||
|
||||
PCALIGN $16
|
||||
loop:
|
||||
BEQ X10, X11, done
|
||||
MOVBU (X10), X15
|
||||
ADD $1, X10
|
||||
BNE X12, X15, loop
|
||||
ADD $1, X14
|
||||
JMP loop
|
||||
|
||||
done:
|
||||
MOV X14, X10
|
||||
RET
|
||||
169
src/internal/bytealg/count_s390x.s
Normal file
169
src/internal/bytealg/count_s390x.s
Normal file
@@ -0,0 +1,169 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// condition code masks
|
||||
#define EQ 8
|
||||
#define NE 7
|
||||
|
||||
// register assignments
|
||||
#define R_ZERO R0
|
||||
#define R_VAL R1
|
||||
#define R_TMP R2
|
||||
#define R_PTR R3
|
||||
#define R_LEN R4
|
||||
#define R_CHAR R5
|
||||
#define R_RET R6
|
||||
#define R_ITER R7
|
||||
#define R_CNT R8
|
||||
#define R_MPTR R9
|
||||
|
||||
// vector register assignments
|
||||
#define V_ZERO V0
|
||||
#define V_CHAR V1
|
||||
#define V_MASK V2
|
||||
#define V_VAL V3
|
||||
#define V_CNT V4
|
||||
|
||||
// mask for trailing bytes in vector implementation
|
||||
GLOBL countbytemask<>(SB), RODATA, $16
|
||||
DATA countbytemask<>+0(SB)/8, $0x0101010101010101
|
||||
DATA countbytemask<>+8(SB)/8, $0x0101010101010101
|
||||
|
||||
// func Count(b []byte, c byte) int
|
||||
TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
|
||||
LMG b+0(FP), R_PTR, R_LEN
|
||||
MOVBZ c+24(FP), R_CHAR
|
||||
MOVD $ret+32(FP), R_RET
|
||||
BR countbytebody<>(SB)
|
||||
|
||||
// func CountString(s string, c byte) int
|
||||
TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
|
||||
LMG s+0(FP), R_PTR, R_LEN
|
||||
MOVBZ c+16(FP), R_CHAR
|
||||
MOVD $ret+24(FP), R_RET
|
||||
BR countbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// R_PTR = address of array of bytes
|
||||
// R_LEN = number of bytes in array
|
||||
// R_CHAR = byte value to count zero (extended to register width)
|
||||
// R_RET = address of return value
|
||||
TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
|
||||
MOVD $internal∕cpu·S390X+const_offsetS390xHasVX(SB), R_TMP
|
||||
MOVD $countbytemask<>(SB), R_MPTR
|
||||
CGIJ $EQ, R_LEN, $0, ret0 // return if length is 0.
|
||||
SRD $4, R_LEN, R_ITER // R_ITER is the number of 16-byte chunks
|
||||
MOVBZ (R_TMP), R_TMP // load bool indicating support for vector facility
|
||||
CGIJ $EQ, R_TMP, $0, novx // jump to scalar code if the vector facility is not available
|
||||
|
||||
// Start of vector code (have vector facility).
|
||||
//
|
||||
// Set R_LEN to be the length mod 16 minus 1 to use as an index for
|
||||
// vector 'load with length' (VLL). It will be in the range [-1,14].
|
||||
// Also replicate c across a 16-byte vector and initialize V_ZERO.
|
||||
ANDW $0xf, R_LEN
|
||||
VLVGB $0, R_CHAR, V_CHAR // V_CHAR = [16]byte{c, 0, ..., 0, 0}
|
||||
VZERO V_ZERO // V_ZERO = [1]uint128{0}
|
||||
ADDW $-1, R_LEN
|
||||
VREPB $0, V_CHAR, V_CHAR // V_CHAR = [16]byte{c, c, ..., c, c}
|
||||
|
||||
// Jump to loop if we have more than 15 bytes to process.
|
||||
CGIJ $NE, R_ITER, $0, vxchunks
|
||||
|
||||
// Load 1-15 bytes and corresponding mask.
|
||||
// Note: only the low 32-bits of R_LEN are used for the index.
|
||||
VLL R_LEN, (R_PTR), V_VAL
|
||||
VLL R_LEN, (R_MPTR), V_MASK
|
||||
|
||||
// Compare each byte in input chunk against byte to be counted.
|
||||
// Each byte element will be set to either 0 (no match) or 1 (match).
|
||||
VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
|
||||
VN V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
|
||||
|
||||
// Accumulate matched byte count in 128-bit integer value.
|
||||
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
|
||||
VSUMQF V_VAL, V_ZERO, V_CNT // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
|
||||
|
||||
// Return rightmost (lowest) 64-bit part of accumulator.
|
||||
VSTEG $1, V_CNT, (R_RET)
|
||||
RET
|
||||
|
||||
vxchunks:
|
||||
// Load 0x01 into every byte element in the 16-byte mask vector.
|
||||
VREPIB $1, V_MASK // V_MASK = [16]byte{1, 1, ..., 1, 1}
|
||||
VZERO V_CNT // initial uint128 count of 0
|
||||
|
||||
vxloop:
|
||||
// Load input bytes in 16-byte chunks.
|
||||
VL (R_PTR), V_VAL
|
||||
|
||||
// Compare each byte in input chunk against byte to be counted.
|
||||
// Each byte element will be set to either 0 (no match) or 1 (match).
|
||||
VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
|
||||
VN V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
|
||||
|
||||
// Increment input string address.
|
||||
MOVD $16(R_PTR), R_PTR
|
||||
|
||||
// Accumulate matched byte count in 128-bit integer value.
|
||||
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
|
||||
VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
|
||||
VAQ V_VAL, V_CNT, V_CNT // accumulate
|
||||
|
||||
// Repeat until all 16-byte chunks are done.
|
||||
BRCTG R_ITER, vxloop
|
||||
|
||||
// Skip to end if there are no trailing bytes.
|
||||
CIJ $EQ, R_LEN, $-1, vxret
|
||||
|
||||
// Load 1-15 bytes and corresponding mask.
|
||||
// Note: only the low 32-bits of R_LEN are used for the index.
|
||||
VLL R_LEN, (R_PTR), V_VAL
|
||||
VLL R_LEN, (R_MPTR), V_MASK
|
||||
|
||||
// Compare each byte in input chunk against byte to be counted.
|
||||
// Each byte element will be set to either 0 (no match) or 1 (match).
|
||||
VCEQB V_CHAR, V_VAL, V_VAL
|
||||
VN V_MASK, V_VAL, V_VAL
|
||||
|
||||
// Accumulate matched byte count in 128-bit integer value.
|
||||
VSUMB V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
|
||||
VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
|
||||
VAQ V_VAL, V_CNT, V_CNT // accumulate
|
||||
|
||||
vxret:
|
||||
// Return rightmost (lowest) 64-bit part of accumulator.
|
||||
VSTEG $1, V_CNT, (R_RET)
|
||||
RET
|
||||
|
||||
novx:
|
||||
// Start of non-vector code (the vector facility not available).
|
||||
//
|
||||
// Initialise counter and constant zero.
|
||||
MOVD $0, R_CNT
|
||||
MOVD $0, R_ZERO
|
||||
|
||||
loop:
|
||||
// Read 1-byte from input and compare.
|
||||
// Note: avoid putting LOCGR in critical path.
|
||||
MOVBZ (R_PTR), R_VAL
|
||||
MOVD $1, R_TMP
|
||||
MOVD $1(R_PTR), R_PTR
|
||||
CMPW R_VAL, R_CHAR
|
||||
LOCGR $NE, R_ZERO, R_TMP // select 0 if no match (1 if there is a match)
|
||||
ADD R_TMP, R_CNT // accumulate 64-bit result
|
||||
|
||||
// Repeat until all bytes have been checked.
|
||||
BRCTG R_LEN, loop
|
||||
|
||||
ret:
|
||||
MOVD R_CNT, (R_RET)
|
||||
RET
|
||||
|
||||
ret0:
|
||||
MOVD $0, (R_RET)
|
||||
RET
|
||||
130
src/internal/bytealg/equal_386.s
Normal file
130
src/internal/bytealg/equal_386.s
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB),NOSPLIT,$0-13
|
||||
MOVL a+0(FP), SI
|
||||
MOVL b+4(FP), DI
|
||||
CMPL SI, DI
|
||||
JEQ eq
|
||||
MOVL size+8(FP), BX
|
||||
LEAL ret+12(FP), AX
|
||||
JMP memeqbody<>(SB)
|
||||
eq:
|
||||
MOVB $1, ret+12(FP)
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
|
||||
MOVL a+0(FP), SI
|
||||
MOVL b+4(FP), DI
|
||||
CMPL SI, DI
|
||||
JEQ eq
|
||||
MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
|
||||
LEAL ret+8(FP), AX
|
||||
JMP memeqbody<>(SB)
|
||||
eq:
|
||||
MOVB $1, ret+8(FP)
|
||||
RET
|
||||
|
||||
// a in SI
|
||||
// b in DI
|
||||
// count in BX
|
||||
// address of result byte in AX
|
||||
TEXT memeqbody<>(SB),NOSPLIT,$0-0
|
||||
CMPL BX, $4
|
||||
JB small
|
||||
|
||||
// 64 bytes at a time using xmm registers
|
||||
hugeloop:
|
||||
CMPL BX, $64
|
||||
JB bigloop
|
||||
#ifdef GO386_softfloat
|
||||
JMP bigloop
|
||||
#endif
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
MOVOU 16(SI), X2
|
||||
MOVOU 16(DI), X3
|
||||
MOVOU 32(SI), X4
|
||||
MOVOU 32(DI), X5
|
||||
MOVOU 48(SI), X6
|
||||
MOVOU 48(DI), X7
|
||||
PCMPEQB X1, X0
|
||||
PCMPEQB X3, X2
|
||||
PCMPEQB X5, X4
|
||||
PCMPEQB X7, X6
|
||||
PAND X2, X0
|
||||
PAND X6, X4
|
||||
PAND X4, X0
|
||||
PMOVMSKB X0, DX
|
||||
ADDL $64, SI
|
||||
ADDL $64, DI
|
||||
SUBL $64, BX
|
||||
CMPL DX, $0xffff
|
||||
JEQ hugeloop
|
||||
MOVB $0, (AX)
|
||||
RET
|
||||
|
||||
// 4 bytes at a time using 32-bit register
|
||||
bigloop:
|
||||
CMPL BX, $4
|
||||
JBE leftover
|
||||
MOVL (SI), CX
|
||||
MOVL (DI), DX
|
||||
ADDL $4, SI
|
||||
ADDL $4, DI
|
||||
SUBL $4, BX
|
||||
CMPL CX, DX
|
||||
JEQ bigloop
|
||||
MOVB $0, (AX)
|
||||
RET
|
||||
|
||||
// remaining 0-4 bytes
|
||||
leftover:
|
||||
MOVL -4(SI)(BX*1), CX
|
||||
MOVL -4(DI)(BX*1), DX
|
||||
CMPL CX, DX
|
||||
SETEQ (AX)
|
||||
RET
|
||||
|
||||
small:
|
||||
CMPL BX, $0
|
||||
JEQ equal
|
||||
|
||||
LEAL 0(BX*8), CX
|
||||
NEGL CX
|
||||
|
||||
MOVL SI, DX
|
||||
CMPB DX, $0xfc
|
||||
JA si_high
|
||||
|
||||
// load at SI won't cross a page boundary.
|
||||
MOVL (SI), SI
|
||||
JMP si_finish
|
||||
si_high:
|
||||
// address ends in 111111xx. Load up to bytes we want, move to correct position.
|
||||
MOVL -4(SI)(BX*1), SI
|
||||
SHRL CX, SI
|
||||
si_finish:
|
||||
|
||||
// same for DI.
|
||||
MOVL DI, DX
|
||||
CMPB DX, $0xfc
|
||||
JA di_high
|
||||
MOVL (DI), DI
|
||||
JMP di_finish
|
||||
di_high:
|
||||
MOVL -4(DI)(BX*1), DI
|
||||
SHRL CX, DI
|
||||
di_finish:
|
||||
|
||||
SUBL SI, DI
|
||||
SHLL CX, DI
|
||||
equal:
|
||||
SETEQ (AX)
|
||||
RET
|
||||
165
src/internal/bytealg/equal_amd64.s
Normal file
165
src/internal/bytealg/equal_amd64.s
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "asm_amd64.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
|
||||
// AX = a (want in SI)
|
||||
// BX = b (want in DI)
|
||||
// CX = size (want in BX)
|
||||
CMPQ AX, BX
|
||||
JNE neq
|
||||
MOVQ $1, AX // return 1
|
||||
RET
|
||||
neq:
|
||||
MOVQ AX, SI
|
||||
MOVQ BX, DI
|
||||
MOVQ CX, BX
|
||||
JMP memeqbody<>(SB)
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
|
||||
// AX = a (want in SI)
|
||||
// BX = b (want in DI)
|
||||
// 8(DX) = size (want in BX)
|
||||
CMPQ AX, BX
|
||||
JNE neq
|
||||
MOVQ $1, AX // return 1
|
||||
RET
|
||||
neq:
|
||||
MOVQ AX, SI
|
||||
MOVQ BX, DI
|
||||
MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
|
||||
JMP memeqbody<>(SB)
|
||||
|
||||
// Input:
|
||||
// a in SI
|
||||
// b in DI
|
||||
// count in BX
|
||||
// Output:
|
||||
// result in AX
|
||||
TEXT memeqbody<>(SB),NOSPLIT,$0-0
|
||||
CMPQ BX, $8
|
||||
JB small
|
||||
CMPQ BX, $64
|
||||
JB bigloop
|
||||
#ifndef hasAVX2
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
|
||||
JE hugeloop_avx2
|
||||
|
||||
// 64 bytes at a time using xmm registers
|
||||
PCALIGN $16
|
||||
hugeloop:
|
||||
CMPQ BX, $64
|
||||
JB bigloop
|
||||
MOVOU (SI), X0
|
||||
MOVOU (DI), X1
|
||||
MOVOU 16(SI), X2
|
||||
MOVOU 16(DI), X3
|
||||
MOVOU 32(SI), X4
|
||||
MOVOU 32(DI), X5
|
||||
MOVOU 48(SI), X6
|
||||
MOVOU 48(DI), X7
|
||||
PCMPEQB X1, X0
|
||||
PCMPEQB X3, X2
|
||||
PCMPEQB X5, X4
|
||||
PCMPEQB X7, X6
|
||||
PAND X2, X0
|
||||
PAND X6, X4
|
||||
PAND X4, X0
|
||||
PMOVMSKB X0, DX
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, BX
|
||||
CMPL DX, $0xffff
|
||||
JEQ hugeloop
|
||||
XORQ AX, AX // return 0
|
||||
RET
|
||||
#endif
|
||||
|
||||
// 64 bytes at a time using ymm registers
|
||||
PCALIGN $16
|
||||
hugeloop_avx2:
|
||||
CMPQ BX, $64
|
||||
JB bigloop_avx2
|
||||
VMOVDQU (SI), Y0
|
||||
VMOVDQU (DI), Y1
|
||||
VMOVDQU 32(SI), Y2
|
||||
VMOVDQU 32(DI), Y3
|
||||
VPCMPEQB Y1, Y0, Y4
|
||||
VPCMPEQB Y2, Y3, Y5
|
||||
VPAND Y4, Y5, Y6
|
||||
VPMOVMSKB Y6, DX
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, BX
|
||||
CMPL DX, $0xffffffff
|
||||
JEQ hugeloop_avx2
|
||||
VZEROUPPER
|
||||
XORQ AX, AX // return 0
|
||||
RET
|
||||
|
||||
bigloop_avx2:
|
||||
VZEROUPPER
|
||||
|
||||
// 8 bytes at a time using 64-bit register
|
||||
PCALIGN $16
|
||||
bigloop:
|
||||
CMPQ BX, $8
|
||||
JBE leftover
|
||||
MOVQ (SI), CX
|
||||
MOVQ (DI), DX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
SUBQ $8, BX
|
||||
CMPQ CX, DX
|
||||
JEQ bigloop
|
||||
XORQ AX, AX // return 0
|
||||
RET
|
||||
|
||||
// remaining 0-8 bytes
|
||||
leftover:
|
||||
MOVQ -8(SI)(BX*1), CX
|
||||
MOVQ -8(DI)(BX*1), DX
|
||||
CMPQ CX, DX
|
||||
SETEQ AX
|
||||
RET
|
||||
|
||||
small:
|
||||
CMPQ BX, $0
|
||||
JEQ equal
|
||||
|
||||
LEAQ 0(BX*8), CX
|
||||
NEGQ CX
|
||||
|
||||
CMPB SI, $0xf8
|
||||
JA si_high
|
||||
|
||||
// load at SI won't cross a page boundary.
|
||||
MOVQ (SI), SI
|
||||
JMP si_finish
|
||||
si_high:
|
||||
// address ends in 11111xxx. Load up to bytes we want, move to correct position.
|
||||
MOVQ -8(SI)(BX*1), SI
|
||||
SHRQ CX, SI
|
||||
si_finish:
|
||||
|
||||
// same for DI.
|
||||
CMPB DI, $0xf8
|
||||
JA di_high
|
||||
MOVQ (DI), DI
|
||||
JMP di_finish
|
||||
di_high:
|
||||
MOVQ -8(DI)(BX*1), DI
|
||||
SHRQ CX, DI
|
||||
di_finish:
|
||||
|
||||
SUBQ SI, DI
|
||||
SHLQ CX, DI
|
||||
equal:
|
||||
SETEQ AX
|
||||
RET
|
||||
91
src/internal/bytealg/equal_arm.s
Normal file
91
src/internal/bytealg/equal_arm.s
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-13
|
||||
MOVW a+0(FP), R0
|
||||
MOVW b+4(FP), R2
|
||||
CMP R0, R2
|
||||
B.EQ eq
|
||||
MOVW size+8(FP), R1
|
||||
CMP $0, R1
|
||||
B.EQ eq // short path to handle 0-byte case
|
||||
MOVW $ret+12(FP), R7
|
||||
B memeqbody<>(SB)
|
||||
eq:
|
||||
MOVW $1, R0
|
||||
MOVB R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-9
|
||||
MOVW a+0(FP), R0
|
||||
MOVW b+4(FP), R2
|
||||
CMP R0, R2
|
||||
B.EQ eq
|
||||
MOVW 4(R7), R1 // compiler stores size at offset 4 in the closure
|
||||
CMP $0, R1
|
||||
B.EQ eq // short path to handle 0-byte case
|
||||
MOVW $ret+8(FP), R7
|
||||
B memeqbody<>(SB)
|
||||
eq:
|
||||
MOVW $1, R0
|
||||
MOVB R0, ret+8(FP)
|
||||
RET
|
||||
|
||||
// Input:
|
||||
// R0: data of a
|
||||
// R1: length
|
||||
// R2: data of b
|
||||
// R7: points to return value
|
||||
//
|
||||
// On exit:
|
||||
// R4, R5 and R6 are clobbered
|
||||
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMP $1, R1
|
||||
B.EQ one // 1-byte special case for better performance
|
||||
|
||||
CMP $4, R1
|
||||
ADD R0, R1 // R1 is the end of the range to compare
|
||||
B.LT byte_loop // length < 4
|
||||
AND $3, R0, R6
|
||||
CMP $0, R6
|
||||
B.NE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a)
|
||||
AND $3, R2, R6
|
||||
CMP $0, R6
|
||||
B.NE byte_loop // unaligned b, use byte-wise compare
|
||||
AND $0xfffffffc, R1, R6
|
||||
// length >= 4
|
||||
chunk4_loop:
|
||||
MOVW.P 4(R0), R4
|
||||
MOVW.P 4(R2), R5
|
||||
CMP R4, R5
|
||||
B.NE notequal
|
||||
CMP R0, R6
|
||||
B.NE chunk4_loop
|
||||
CMP R0, R1
|
||||
B.EQ equal // reached the end
|
||||
byte_loop:
|
||||
MOVBU.P 1(R0), R4
|
||||
MOVBU.P 1(R2), R5
|
||||
CMP R4, R5
|
||||
B.NE notequal
|
||||
CMP R0, R1
|
||||
B.NE byte_loop
|
||||
equal:
|
||||
MOVW $1, R0
|
||||
MOVB R0, (R7)
|
||||
RET
|
||||
one:
|
||||
MOVBU (R0), R4
|
||||
MOVBU (R2), R5
|
||||
CMP R4, R5
|
||||
B.EQ equal
|
||||
notequal:
|
||||
MOVW $0, R0
|
||||
MOVB R0, (R7)
|
||||
RET
|
||||
124
src/internal/bytealg/equal_arm64.s
Normal file
124
src/internal/bytealg/equal_arm64.s
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
|
||||
// short path to handle 0-byte case
|
||||
CBZ R2, equal
|
||||
// short path to handle equal pointers
|
||||
CMP R0, R1
|
||||
BEQ equal
|
||||
B memeqbody<>(SB)
|
||||
equal:
|
||||
MOVD $1, R0
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
|
||||
CMP R0, R1
|
||||
BEQ eq
|
||||
MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure
|
||||
CBZ R2, eq
|
||||
B memeqbody<>(SB)
|
||||
eq:
|
||||
MOVD $1, R0
|
||||
RET
|
||||
|
||||
// input:
|
||||
// R0: pointer a
|
||||
// R1: pointer b
|
||||
// R2: data len
|
||||
// at return: result in R0
|
||||
TEXT memeqbody<>(SB),NOSPLIT,$0
|
||||
CMP $1, R2
|
||||
// handle 1-byte special case for better performance
|
||||
BEQ one
|
||||
CMP $16, R2
|
||||
// handle specially if length < 16
|
||||
BLO tail
|
||||
BIC $0x3f, R2, R3
|
||||
CBZ R3, chunk16
|
||||
// work with 64-byte chunks
|
||||
ADD R3, R0, R6 // end of chunks
|
||||
chunk64_loop:
|
||||
VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
|
||||
VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2]
|
||||
VCMEQ V0.D2, V4.D2, V8.D2
|
||||
VCMEQ V1.D2, V5.D2, V9.D2
|
||||
VCMEQ V2.D2, V6.D2, V10.D2
|
||||
VCMEQ V3.D2, V7.D2, V11.D2
|
||||
VAND V8.B16, V9.B16, V8.B16
|
||||
VAND V8.B16, V10.B16, V8.B16
|
||||
VAND V8.B16, V11.B16, V8.B16
|
||||
CMP R0, R6
|
||||
VMOV V8.D[0], R4
|
||||
VMOV V8.D[1], R5
|
||||
CBZ R4, not_equal
|
||||
CBZ R5, not_equal
|
||||
BNE chunk64_loop
|
||||
AND $0x3f, R2, R2
|
||||
CBZ R2, equal
|
||||
chunk16:
|
||||
// work with 16-byte chunks
|
||||
BIC $0xf, R2, R3
|
||||
CBZ R3, tail
|
||||
ADD R3, R0, R6 // end of chunks
|
||||
chunk16_loop:
|
||||
LDP.P 16(R0), (R4, R5)
|
||||
LDP.P 16(R1), (R7, R9)
|
||||
EOR R4, R7
|
||||
CBNZ R7, not_equal
|
||||
EOR R5, R9
|
||||
CBNZ R9, not_equal
|
||||
CMP R0, R6
|
||||
BNE chunk16_loop
|
||||
AND $0xf, R2, R2
|
||||
CBZ R2, equal
|
||||
tail:
|
||||
// special compare of tail with length < 16
|
||||
TBZ $3, R2, lt_8
|
||||
MOVD (R0), R4
|
||||
MOVD (R1), R5
|
||||
EOR R4, R5
|
||||
CBNZ R5, not_equal
|
||||
SUB $8, R2, R6 // offset of the last 8 bytes
|
||||
MOVD (R0)(R6), R4
|
||||
MOVD (R1)(R6), R5
|
||||
EOR R4, R5
|
||||
CBNZ R5, not_equal
|
||||
B equal
|
||||
lt_8:
|
||||
TBZ $2, R2, lt_4
|
||||
MOVWU (R0), R4
|
||||
MOVWU (R1), R5
|
||||
EOR R4, R5
|
||||
CBNZ R5, not_equal
|
||||
SUB $4, R2, R6 // offset of the last 4 bytes
|
||||
MOVWU (R0)(R6), R4
|
||||
MOVWU (R1)(R6), R5
|
||||
EOR R4, R5
|
||||
CBNZ R5, not_equal
|
||||
B equal
|
||||
lt_4:
|
||||
TBZ $1, R2, lt_2
|
||||
MOVHU.P 2(R0), R4
|
||||
MOVHU.P 2(R1), R5
|
||||
CMP R4, R5
|
||||
BNE not_equal
|
||||
lt_2:
|
||||
TBZ $0, R2, equal
|
||||
one:
|
||||
MOVBU (R0), R4
|
||||
MOVBU (R1), R5
|
||||
CMP R4, R5
|
||||
BNE not_equal
|
||||
equal:
|
||||
MOVD $1, R0
|
||||
RET
|
||||
not_equal:
|
||||
MOVB ZR, R0
|
||||
RET
|
||||
18
src/internal/bytealg/equal_generic.go
Normal file
18
src/internal/bytealg/equal_generic.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
// Equal reports whether a and b
|
||||
// are the same length and contain the same bytes.
|
||||
// A nil argument is equivalent to an empty slice.
|
||||
//
|
||||
// Equal is equivalent to bytes.Equal.
|
||||
// It is provided here for convenience,
|
||||
// because some packages cannot depend on bytes.
|
||||
func Equal(a, b []byte) bool {
|
||||
// Neither cmd/compile nor gccgo allocates for these string conversions.
|
||||
// There is a test for this in package bytes.
|
||||
return string(a) == string(b)
|
||||
}
|
||||
44
src/internal/bytealg/equal_loong64.s
Normal file
44
src/internal/bytealg/equal_loong64.s
Normal file
@@ -0,0 +1,44 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
#define REGCTXT R29
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
|
||||
BEQ R4, R5, eq
|
||||
ADDV R4, R6, R7
|
||||
PCALIGN $16
|
||||
loop:
|
||||
BNE R4, R7, test
|
||||
MOVV $1, R4
|
||||
RET
|
||||
test:
|
||||
MOVBU (R4), R9
|
||||
ADDV $1, R4
|
||||
MOVBU (R5), R10
|
||||
ADDV $1, R5
|
||||
BEQ R9, R10, loop
|
||||
|
||||
MOVB R0, R4
|
||||
RET
|
||||
eq:
|
||||
MOVV $1, R4
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$40-17
|
||||
BEQ R4, R5, eq
|
||||
MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure
|
||||
MOVV R4, 8(R3)
|
||||
MOVV R5, 16(R3)
|
||||
MOVV R6, 24(R3)
|
||||
JAL runtime·memequal(SB)
|
||||
MOVBU 32(R3), R4
|
||||
RET
|
||||
eq:
|
||||
MOVV $1, R4
|
||||
RET
|
||||
118
src/internal/bytealg/equal_mips64x.s
Normal file
118
src/internal/bytealg/equal_mips64x.s
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips64 || mips64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
#define REGCTXT R22
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
|
||||
MOVV a+0(FP), R1
|
||||
MOVV b+8(FP), R2
|
||||
BEQ R1, R2, eq
|
||||
MOVV size+16(FP), R3
|
||||
ADDV R1, R3, R4
|
||||
|
||||
// chunk size is 16
|
||||
SGTU $16, R3, R8
|
||||
BEQ R0, R8, chunk_entry
|
||||
|
||||
byte_loop:
|
||||
BNE R1, R4, byte_test
|
||||
MOVV $1, R1
|
||||
MOVB R1, ret+24(FP)
|
||||
RET
|
||||
byte_test:
|
||||
MOVBU (R1), R6
|
||||
ADDV $1, R1
|
||||
MOVBU (R2), R7
|
||||
ADDV $1, R2
|
||||
BEQ R6, R7, byte_loop
|
||||
JMP not_eq
|
||||
|
||||
chunk_entry:
|
||||
// make sure both a and b are aligned
|
||||
OR R1, R2, R9
|
||||
AND $0x7, R9
|
||||
BNE R0, R9, byte_loop
|
||||
JMP chunk_loop_1
|
||||
|
||||
chunk_loop:
|
||||
// chunk size is 16
|
||||
SGTU $16, R3, R8
|
||||
BNE R0, R8, chunk_tail_8
|
||||
chunk_loop_1:
|
||||
MOVV (R1), R6
|
||||
MOVV (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
MOVV 8(R1), R12
|
||||
MOVV 8(R2), R13
|
||||
ADDV $16, R1
|
||||
ADDV $16, R2
|
||||
SUBV $16, R3
|
||||
BEQ R12, R13, chunk_loop
|
||||
JMP not_eq
|
||||
|
||||
chunk_tail_8:
|
||||
AND $8, R3, R14
|
||||
BEQ R0, R14, chunk_tail_4
|
||||
MOVV (R1), R6
|
||||
MOVV (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $8, R1
|
||||
ADDV $8, R2
|
||||
|
||||
chunk_tail_4:
|
||||
AND $4, R3, R14
|
||||
BEQ R0, R14, chunk_tail_2
|
||||
MOVWU (R1), R6
|
||||
MOVWU (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $4, R1
|
||||
ADDV $4, R2
|
||||
|
||||
chunk_tail_2:
|
||||
AND $2, R3, R14
|
||||
BEQ R0, R14, chunk_tail_1
|
||||
MOVHU (R1), R6
|
||||
MOVHU (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $2, R1
|
||||
ADDV $2, R2
|
||||
|
||||
chunk_tail_1:
|
||||
AND $1, R3, R14
|
||||
BEQ R0, R14, eq
|
||||
MOVBU (R1), R6
|
||||
MOVBU (R2), R7
|
||||
BEQ R6, R7, eq
|
||||
|
||||
not_eq:
|
||||
MOVB R0, ret+24(FP)
|
||||
RET
|
||||
eq:
|
||||
MOVV $1, R1
|
||||
MOVB R1, ret+24(FP)
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
|
||||
MOVV a+0(FP), R1
|
||||
MOVV b+8(FP), R2
|
||||
BEQ R1, R2, eq
|
||||
MOVV 8(REGCTXT), R3 // compiler stores size at offset 8 in the closure
|
||||
MOVV R1, 8(R29)
|
||||
MOVV R2, 16(R29)
|
||||
MOVV R3, 24(R29)
|
||||
JAL runtime·memequal(SB)
|
||||
MOVBU 32(R29), R1
|
||||
MOVB R1, ret+16(FP)
|
||||
RET
|
||||
eq:
|
||||
MOVV $1, R1
|
||||
MOVB R1, ret+16(FP)
|
||||
RET
|
||||
62
src/internal/bytealg/equal_mipsx.s
Normal file
62
src/internal/bytealg/equal_mipsx.s
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips || mipsle
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
#define REGCTXT R22
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB),NOSPLIT,$0-13
|
||||
MOVW a+0(FP), R1
|
||||
MOVW b+4(FP), R2
|
||||
BEQ R1, R2, eq
|
||||
MOVW size+8(FP), R3
|
||||
ADDU R1, R3, R4
|
||||
loop:
|
||||
BNE R1, R4, test
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+12(FP)
|
||||
RET
|
||||
test:
|
||||
MOVBU (R1), R6
|
||||
ADDU $1, R1
|
||||
MOVBU (R2), R7
|
||||
ADDU $1, R2
|
||||
BEQ R6, R7, loop
|
||||
|
||||
MOVB R0, ret+12(FP)
|
||||
RET
|
||||
eq:
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+12(FP)
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
|
||||
MOVW a+0(FP), R1
|
||||
MOVW b+4(FP), R2
|
||||
BEQ R1, R2, eq
|
||||
MOVW 4(REGCTXT), R3 // compiler stores size at offset 4 in the closure
|
||||
ADDU R1, R3, R4
|
||||
loop:
|
||||
BNE R1, R4, test
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+8(FP)
|
||||
RET
|
||||
test:
|
||||
MOVBU (R1), R6
|
||||
ADDU $1, R1
|
||||
MOVBU (R2), R7
|
||||
ADDU $1, R2
|
||||
BEQ R6, R7, loop
|
||||
|
||||
MOVB R0, ret+8(FP)
|
||||
RET
|
||||
eq:
|
||||
MOVW $1, R1
|
||||
MOVB R1, ret+8(FP)
|
||||
RET
|
||||
21
src/internal/bytealg/equal_native.go
Normal file
21
src/internal/bytealg/equal_native.go
Normal file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// The declarations below generate ABI wrappers for functions
|
||||
// implemented in assembly in this package but declared in another
|
||||
// package.
|
||||
|
||||
// The compiler generates calls to runtime.memequal and runtime.memequal_varlen.
|
||||
// In addition, the runtime calls runtime.memequal explicitly.
|
||||
// Those functions are implemented in this package.
|
||||
|
||||
//go:linkname abigen_runtime_memequal runtime.memequal
|
||||
func abigen_runtime_memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
|
||||
//go:linkname abigen_runtime_memequal_varlen runtime.memequal_varlen
|
||||
func abigen_runtime_memequal_varlen(a, b unsafe.Pointer) bool
|
||||
207
src/internal/bytealg/equal_ppc64x.s
Normal file
207
src/internal/bytealg/equal_ppc64x.s
Normal file
@@ -0,0 +1,207 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// 4K (smallest case) page size offset mask for PPC64.
|
||||
#define PAGE_OFFSET 4095
|
||||
|
||||
// Likewise, the BC opcode is hard to read, and no extended
|
||||
// mnemonics are offered for these forms.
|
||||
#define BGELR_CR6 BC 4, CR6LT, (LR)
|
||||
#define BEQLR BC 12, CR0EQ, (LR)
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
|
||||
// R3 = a
|
||||
// R4 = b
|
||||
// R5 = size
|
||||
BR memeqbody<>(SB)
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
|
||||
// R3 = a
|
||||
// R4 = b
|
||||
CMP R3, R4
|
||||
BEQ eq
|
||||
MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
|
||||
BR memeqbody<>(SB)
|
||||
eq:
|
||||
MOVD $1, R3
|
||||
RET
|
||||
|
||||
// Do an efficient memequal for ppc64
|
||||
// R3 = s1
|
||||
// R4 = s2
|
||||
// R5 = len
|
||||
// On exit:
|
||||
// R3 = return value
|
||||
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
MOVD R3, R8 // Move s1 into R8
|
||||
ADD R5, R3, R9 // &s1[len(s1)]
|
||||
ADD R5, R4, R10 // &s2[len(s2)]
|
||||
MOVD $1, R11
|
||||
CMP R5, $16 // Use GPR checks for check for len <= 16
|
||||
BLE check0_16
|
||||
MOVD $0, R3 // Assume no-match in case BGELR CR6 returns
|
||||
CMP R5, $32 // Use overlapping VSX loads for len <= 32
|
||||
BLE check17_32 // Do a pair of overlapping VSR compares
|
||||
CMP R5, $64
|
||||
BLE check33_64 // Hybrid check + overlap compare.
|
||||
|
||||
setup64:
|
||||
SRD $6, R5, R6 // number of 64 byte chunks to compare
|
||||
MOVD R6, CTR
|
||||
MOVD $16, R14 // index for VSX loads and stores
|
||||
MOVD $32, R15
|
||||
MOVD $48, R16
|
||||
ANDCC $0x3F, R5, R5 // len%64==0?
|
||||
|
||||
PCALIGN $16
|
||||
loop64:
|
||||
LXVD2X (R8+R0), V0
|
||||
LXVD2X (R4+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2 // compare, setting CR6
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R14), V0
|
||||
LXVD2X (R4+R14), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R15), V0
|
||||
LXVD2X (R4+R15), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R16), V0
|
||||
LXVD2X (R4+R16), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
ADD $64,R8 // bump up to next 64
|
||||
ADD $64,R4
|
||||
BDNZ loop64
|
||||
|
||||
ISEL CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0.
|
||||
BEQLR // return if no tail.
|
||||
|
||||
ADD $-64, R9, R8
|
||||
ADD $-64, R10, R4
|
||||
LXVD2X (R8+R0), V0
|
||||
LXVD2X (R4+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R14), V0
|
||||
LXVD2X (R4+R14), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R15), V0
|
||||
LXVD2X (R4+R15), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
LXVD2X (R8+R16), V0
|
||||
LXVD2X (R4+R16), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
ISEL CR6LT, R11, R0, R3
|
||||
RET
|
||||
|
||||
check33_64:
|
||||
// Bytes 0-15
|
||||
LXVD2X (R8+R0), V0
|
||||
LXVD2X (R4+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
ADD $16, R8
|
||||
ADD $16, R4
|
||||
|
||||
// Bytes 16-31
|
||||
LXVD2X (R8+R0), V0
|
||||
LXVD2X (R4+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
BGELR_CR6
|
||||
|
||||
// A little tricky, but point R4,R8 to &sx[len-32],
|
||||
// and reuse check17_32 to check the next 1-31 bytes (with some overlap)
|
||||
ADD $-32, R9, R8
|
||||
ADD $-32, R10, R4
|
||||
// Fallthrough
|
||||
|
||||
check17_32:
|
||||
LXVD2X (R8+R0), V0
|
||||
LXVD2X (R4+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
ISEL CR6LT, R11, R0, R5
|
||||
|
||||
// Load sX[len(sX)-16:len(sX)] and compare.
|
||||
ADD $-16, R9
|
||||
ADD $-16, R10
|
||||
LXVD2X (R9+R0), V0
|
||||
LXVD2X (R10+R0), V1
|
||||
VCMPEQUBCC V0, V1, V2
|
||||
ISEL CR6LT, R5, R0, R3
|
||||
RET
|
||||
|
||||
check0_16:
|
||||
#ifdef GOPPC64_power10
|
||||
SLD $56, R5, R7
|
||||
LXVL R8, R7, V0
|
||||
LXVL R4, R7, V1
|
||||
VCMPEQUDCC V0, V1, V2
|
||||
ISEL CR6LT, R11, R0, R3
|
||||
RET
|
||||
#else
|
||||
CMP R5, $8
|
||||
BLT check0_7
|
||||
// Load sX[0:7] and compare.
|
||||
MOVD (R8), R6
|
||||
MOVD (R4), R7
|
||||
CMP R6, R7
|
||||
ISEL CR0EQ, R11, R0, R5
|
||||
// Load sX[len(sX)-8:len(sX)] and compare.
|
||||
MOVD -8(R9), R6
|
||||
MOVD -8(R10), R7
|
||||
CMP R6, R7
|
||||
ISEL CR0EQ, R5, R0, R3
|
||||
RET
|
||||
|
||||
check0_7:
|
||||
CMP R5,$0
|
||||
MOVD $1, R3
|
||||
BEQLR // return if len == 0
|
||||
|
||||
// Check < 8B loads with a single compare, but select the load address
|
||||
// such that it cannot cross a page boundary. Load a few bytes from the
|
||||
// lower address if that does not cross the lower page. Or, load a few
|
||||
// extra bytes from the higher addresses. And align those values
|
||||
// consistently in register as either address may have differing
|
||||
// alignment requirements.
|
||||
ANDCC $PAGE_OFFSET, R8, R6 // &sX & PAGE_OFFSET
|
||||
ANDCC $PAGE_OFFSET, R4, R9
|
||||
SUBC R5, $8, R12 // 8-len
|
||||
SLD $3, R12, R14 // (8-len)*8
|
||||
CMPU R6, R12, CR1 // Enough bytes lower in the page to load lower?
|
||||
CMPU R9, R12, CR0
|
||||
SUB R12, R8, R6 // compute lower load address
|
||||
SUB R12, R4, R9
|
||||
ISEL CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len))
|
||||
ISEL CR0LT, R4, R9, R4 // Similar for s2
|
||||
MOVD (R8), R15
|
||||
MOVD (R4), R16
|
||||
SLD R14, R15, R7
|
||||
SLD R14, R16, R17
|
||||
SRD R14, R7, R7 // Clear the upper (8-len) bytes (with 2 shifts)
|
||||
SRD R14, R17, R17
|
||||
SRD R14, R15, R6 // Clear the lower (8-len) bytes
|
||||
SRD R14, R16, R9
|
||||
#ifdef GOARCH_ppc64le
|
||||
ISEL CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment
|
||||
ISEL CR0LT, R17, R9, R4
|
||||
#else
|
||||
ISEL CR1LT, R6, R7, R8
|
||||
ISEL CR0LT, R9, R17, R4
|
||||
#endif
|
||||
CMP R4, R8
|
||||
ISEL CR0EQ, R11, R0, R3
|
||||
RET
|
||||
#endif // tail processing if !defined(GOPPC64_power10)
|
||||
126
src/internal/bytealg/equal_riscv64.s
Normal file
126
src/internal/bytealg/equal_riscv64.s
Normal file
@@ -0,0 +1,126 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
#define CTXT S10
|
||||
|
||||
// func memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
|
||||
// X10 = a_base
|
||||
// X11 = b_base
|
||||
// X12 = size
|
||||
JMP memequal<>(SB)
|
||||
|
||||
// func memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
|
||||
MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure
|
||||
// X10 = a_base
|
||||
// X11 = b_base
|
||||
JMP memequal<>(SB)
|
||||
|
||||
// On entry X10 and X11 contain pointers, X12 contains length.
|
||||
// For non-regabi X13 contains address for return value.
|
||||
// For regabi return value in X10.
|
||||
TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
|
||||
BEQ X10, X11, eq
|
||||
|
||||
MOV $32, X23
|
||||
BLT X12, X23, loop4_check
|
||||
|
||||
// Check alignment - if alignment differs we have to do one byte at a time.
|
||||
AND $7, X10, X9
|
||||
AND $7, X11, X19
|
||||
BNE X9, X19, loop4_check
|
||||
BEQZ X9, loop32_check
|
||||
|
||||
// Check one byte at a time until we reach 8 byte alignment.
|
||||
SUB X9, X0, X9
|
||||
ADD $8, X9, X9
|
||||
SUB X9, X12, X12
|
||||
align:
|
||||
SUB $1, X9
|
||||
MOVBU 0(X10), X19
|
||||
MOVBU 0(X11), X20
|
||||
BNE X19, X20, not_eq
|
||||
ADD $1, X10
|
||||
ADD $1, X11
|
||||
BNEZ X9, align
|
||||
|
||||
loop32_check:
|
||||
MOV $32, X9
|
||||
BLT X12, X9, loop16_check
|
||||
loop32:
|
||||
MOV 0(X10), X19
|
||||
MOV 0(X11), X20
|
||||
MOV 8(X10), X21
|
||||
MOV 8(X11), X22
|
||||
BNE X19, X20, not_eq
|
||||
BNE X21, X22, not_eq
|
||||
MOV 16(X10), X14
|
||||
MOV 16(X11), X15
|
||||
MOV 24(X10), X16
|
||||
MOV 24(X11), X17
|
||||
BNE X14, X15, not_eq
|
||||
BNE X16, X17, not_eq
|
||||
ADD $32, X10
|
||||
ADD $32, X11
|
||||
SUB $32, X12
|
||||
BGE X12, X9, loop32
|
||||
BEQZ X12, eq
|
||||
|
||||
loop16_check:
|
||||
MOV $16, X23
|
||||
BLT X12, X23, loop4_check
|
||||
loop16:
|
||||
MOV 0(X10), X19
|
||||
MOV 0(X11), X20
|
||||
MOV 8(X10), X21
|
||||
MOV 8(X11), X22
|
||||
BNE X19, X20, not_eq
|
||||
BNE X21, X22, not_eq
|
||||
ADD $16, X10
|
||||
ADD $16, X11
|
||||
SUB $16, X12
|
||||
BGE X12, X23, loop16
|
||||
BEQZ X12, eq
|
||||
|
||||
loop4_check:
|
||||
MOV $4, X23
|
||||
BLT X12, X23, loop1
|
||||
loop4:
|
||||
MOVBU 0(X10), X19
|
||||
MOVBU 0(X11), X20
|
||||
MOVBU 1(X10), X21
|
||||
MOVBU 1(X11), X22
|
||||
BNE X19, X20, not_eq
|
||||
BNE X21, X22, not_eq
|
||||
MOVBU 2(X10), X14
|
||||
MOVBU 2(X11), X15
|
||||
MOVBU 3(X10), X16
|
||||
MOVBU 3(X11), X17
|
||||
BNE X14, X15, not_eq
|
||||
BNE X16, X17, not_eq
|
||||
ADD $4, X10
|
||||
ADD $4, X11
|
||||
SUB $4, X12
|
||||
BGE X12, X23, loop4
|
||||
|
||||
loop1:
|
||||
BEQZ X12, eq
|
||||
MOVBU 0(X10), X19
|
||||
MOVBU 0(X11), X20
|
||||
BNE X19, X20, not_eq
|
||||
ADD $1, X10
|
||||
ADD $1, X11
|
||||
SUB $1, X12
|
||||
JMP loop1
|
||||
|
||||
not_eq:
|
||||
MOVB ZERO, X10
|
||||
RET
|
||||
eq:
|
||||
MOV $1, X10
|
||||
RET
|
||||
92
src/internal/bytealg/equal_s390x.s
Normal file
92
src/internal/bytealg/equal_s390x.s
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(a, b unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
|
||||
MOVD a+0(FP), R3
|
||||
MOVD b+8(FP), R5
|
||||
MOVD size+16(FP), R6
|
||||
LA ret+24(FP), R7
|
||||
BR memeqbody<>(SB)
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
|
||||
MOVD a+0(FP), R3
|
||||
MOVD b+8(FP), R5
|
||||
MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure
|
||||
LA ret+16(FP), R7
|
||||
BR memeqbody<>(SB)
|
||||
|
||||
// input:
|
||||
// R3 = a
|
||||
// R5 = b
|
||||
// R6 = len
|
||||
// R7 = address of output byte (stores 0 or 1 here)
|
||||
// a and b have the same length
|
||||
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMPBEQ R3, R5, equal
|
||||
loop:
|
||||
CMPBEQ R6, $0, equal
|
||||
CMPBLT R6, $32, tiny
|
||||
CMP R6, $256
|
||||
BLT tail
|
||||
CLC $256, 0(R3), 0(R5)
|
||||
BNE notequal
|
||||
SUB $256, R6
|
||||
LA 256(R3), R3
|
||||
LA 256(R5), R5
|
||||
BR loop
|
||||
tail:
|
||||
SUB $1, R6, R8
|
||||
EXRL $memeqbodyclc<>(SB), R8
|
||||
BEQ equal
|
||||
notequal:
|
||||
MOVB $0, 0(R7)
|
||||
RET
|
||||
equal:
|
||||
MOVB $1, 0(R7)
|
||||
RET
|
||||
tiny:
|
||||
MOVD $0, R2
|
||||
CMPBLT R6, $16, lt16
|
||||
MOVD 0(R3), R8
|
||||
MOVD 0(R5), R9
|
||||
CMPBNE R8, R9, notequal
|
||||
MOVD 8(R3), R8
|
||||
MOVD 8(R5), R9
|
||||
CMPBNE R8, R9, notequal
|
||||
LA 16(R2), R2
|
||||
SUB $16, R6
|
||||
lt16:
|
||||
CMPBLT R6, $8, lt8
|
||||
MOVD 0(R3)(R2*1), R8
|
||||
MOVD 0(R5)(R2*1), R9
|
||||
CMPBNE R8, R9, notequal
|
||||
LA 8(R2), R2
|
||||
SUB $8, R6
|
||||
lt8:
|
||||
CMPBLT R6, $4, lt4
|
||||
MOVWZ 0(R3)(R2*1), R8
|
||||
MOVWZ 0(R5)(R2*1), R9
|
||||
CMPBNE R8, R9, notequal
|
||||
LA 4(R2), R2
|
||||
SUB $4, R6
|
||||
lt4:
|
||||
#define CHECK(n) \
|
||||
CMPBEQ R6, $n, equal \
|
||||
MOVB n(R3)(R2*1), R8 \
|
||||
MOVB n(R5)(R2*1), R9 \
|
||||
CMPBNE R8, R9, notequal
|
||||
CHECK(0)
|
||||
CHECK(1)
|
||||
CHECK(2)
|
||||
CHECK(3)
|
||||
BR equal
|
||||
|
||||
TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CLC $1, 0(R3), 0(R5)
|
||||
RET
|
||||
77
src/internal/bytealg/equal_wasm.s
Normal file
77
src/internal/bytealg/equal_wasm.s
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// memequal(p, q unsafe.Pointer, size uintptr) bool
|
||||
TEXT runtime·memequal(SB), NOSPLIT, $0-25
|
||||
Get SP
|
||||
I64Load a+0(FP)
|
||||
I64Load b+8(FP)
|
||||
I64Load size+16(FP)
|
||||
Call memeqbody<>(SB)
|
||||
I64Store8 ret+24(FP)
|
||||
RET
|
||||
|
||||
// memequal_varlen(a, b unsafe.Pointer) bool
|
||||
TEXT runtime·memequal_varlen(SB), NOSPLIT, $0-17
|
||||
Get SP
|
||||
I64Load a+0(FP)
|
||||
I64Load b+8(FP)
|
||||
I64Load 8(CTXT) // compiler stores size at offset 8 in the closure
|
||||
Call memeqbody<>(SB)
|
||||
I64Store8 ret+16(FP)
|
||||
RET
|
||||
|
||||
// params: a, b, len
|
||||
// ret: 0/1
|
||||
TEXT memeqbody<>(SB), NOSPLIT, $0-0
|
||||
Get R0
|
||||
Get R1
|
||||
I64Eq
|
||||
If
|
||||
I64Const $1
|
||||
Return
|
||||
End
|
||||
|
||||
loop:
|
||||
Loop
|
||||
Get R2
|
||||
I64Eqz
|
||||
If
|
||||
I64Const $1
|
||||
Return
|
||||
End
|
||||
|
||||
Get R0
|
||||
I32WrapI64
|
||||
I64Load8U $0
|
||||
Get R1
|
||||
I32WrapI64
|
||||
I64Load8U $0
|
||||
I64Ne
|
||||
If
|
||||
I64Const $0
|
||||
Return
|
||||
End
|
||||
|
||||
Get R0
|
||||
I64Const $1
|
||||
I64Add
|
||||
Set R0
|
||||
|
||||
Get R1
|
||||
I64Const $1
|
||||
I64Add
|
||||
Set R1
|
||||
|
||||
Get R2
|
||||
I64Const $1
|
||||
I64Sub
|
||||
Set R2
|
||||
|
||||
Br loop
|
||||
End
|
||||
UNDEF
|
||||
26
src/internal/bytealg/index_amd64.go
Normal file
26
src/internal/bytealg/index_amd64.go
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
import "internal/cpu"
|
||||
|
||||
const MaxBruteForce = 64
|
||||
|
||||
func init() {
|
||||
if cpu.X86.HasAVX2 {
|
||||
MaxLen = 63
|
||||
} else {
|
||||
MaxLen = 31
|
||||
}
|
||||
}
|
||||
|
||||
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||
// before switching over to Index.
|
||||
// n is the number of bytes processed so far.
|
||||
// See the bytes.Index implementation for details.
|
||||
func Cutover(n int) int {
|
||||
// 1 error per 8 characters, plus a few slop to start.
|
||||
return (n + 16) / 8
|
||||
}
|
||||
278
src/internal/bytealg/index_amd64.s
Normal file
278
src/internal/bytealg/index_amd64.s
Normal file
@@ -0,0 +1,278 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||
MOVQ a_base+0(FP), DI
|
||||
MOVQ a_len+8(FP), DX
|
||||
MOVQ b_base+24(FP), R8
|
||||
MOVQ b_len+32(FP), AX
|
||||
MOVQ DI, R10
|
||||
LEAQ ret+48(FP), R11
|
||||
JMP indexbody<>(SB)
|
||||
|
||||
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||
MOVQ a_base+0(FP), DI
|
||||
MOVQ a_len+8(FP), DX
|
||||
MOVQ b_base+16(FP), R8
|
||||
MOVQ b_len+24(FP), AX
|
||||
MOVQ DI, R10
|
||||
LEAQ ret+32(FP), R11
|
||||
JMP indexbody<>(SB)
|
||||
|
||||
// AX: length of string, that we are searching for
|
||||
// DX: length of string, in which we are searching
|
||||
// DI: pointer to string, in which we are searching
|
||||
// R8: pointer to string, that we are searching for
|
||||
// R11: address, where to put return value
|
||||
// Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||
TEXT indexbody<>(SB),NOSPLIT,$0
|
||||
CMPQ AX, DX
|
||||
JA fail
|
||||
CMPQ DX, $16
|
||||
JAE sse42
|
||||
no_sse42:
|
||||
CMPQ AX, $2
|
||||
JA _3_or_more
|
||||
MOVW (R8), R8
|
||||
LEAQ -1(DI)(DX*1), DX
|
||||
PCALIGN $16
|
||||
loop2:
|
||||
MOVW (DI), SI
|
||||
CMPW SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop2
|
||||
JMP fail
|
||||
_3_or_more:
|
||||
CMPQ AX, $3
|
||||
JA _4_or_more
|
||||
MOVW 1(R8), BX
|
||||
MOVW (R8), R8
|
||||
LEAQ -2(DI)(DX*1), DX
|
||||
loop3:
|
||||
MOVW (DI), SI
|
||||
CMPW SI,R8
|
||||
JZ partial_success3
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop3
|
||||
JMP fail
|
||||
partial_success3:
|
||||
MOVW 1(DI), SI
|
||||
CMPW SI,BX
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop3
|
||||
JMP fail
|
||||
_4_or_more:
|
||||
CMPQ AX, $4
|
||||
JA _5_or_more
|
||||
MOVL (R8), R8
|
||||
LEAQ -3(DI)(DX*1), DX
|
||||
loop4:
|
||||
MOVL (DI), SI
|
||||
CMPL SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop4
|
||||
JMP fail
|
||||
_5_or_more:
|
||||
CMPQ AX, $7
|
||||
JA _8_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVL -4(R8)(AX*1), BX
|
||||
MOVL (R8), R8
|
||||
loop5to7:
|
||||
MOVL (DI), SI
|
||||
CMPL SI,R8
|
||||
JZ partial_success5to7
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop5to7
|
||||
JMP fail
|
||||
partial_success5to7:
|
||||
MOVL -4(AX)(DI*1), SI
|
||||
CMPL SI,BX
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop5to7
|
||||
JMP fail
|
||||
_8_or_more:
|
||||
CMPQ AX, $8
|
||||
JA _9_or_more
|
||||
MOVQ (R8), R8
|
||||
LEAQ -7(DI)(DX*1), DX
|
||||
loop8:
|
||||
MOVQ (DI), SI
|
||||
CMPQ SI,R8
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop8
|
||||
JMP fail
|
||||
_9_or_more:
|
||||
CMPQ AX, $15
|
||||
JA _16_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVQ -8(R8)(AX*1), BX
|
||||
MOVQ (R8), R8
|
||||
loop9to15:
|
||||
MOVQ (DI), SI
|
||||
CMPQ SI,R8
|
||||
JZ partial_success9to15
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop9to15
|
||||
JMP fail
|
||||
partial_success9to15:
|
||||
MOVQ -8(AX)(DI*1), SI
|
||||
CMPQ SI,BX
|
||||
JZ success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop9to15
|
||||
JMP fail
|
||||
_16_or_more:
|
||||
CMPQ AX, $16
|
||||
JA _17_or_more
|
||||
MOVOU (R8), X1
|
||||
LEAQ -15(DI)(DX*1), DX
|
||||
loop16:
|
||||
MOVOU (DI), X2
|
||||
PCMPEQB X1, X2
|
||||
PMOVMSKB X2, SI
|
||||
CMPQ SI, $0xffff
|
||||
JE success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop16
|
||||
JMP fail
|
||||
_17_or_more:
|
||||
CMPQ AX, $31
|
||||
JA _32_or_more
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
MOVOU -16(R8)(AX*1), X0
|
||||
MOVOU (R8), X1
|
||||
loop17to31:
|
||||
MOVOU (DI), X2
|
||||
PCMPEQB X1,X2
|
||||
PMOVMSKB X2, SI
|
||||
CMPQ SI, $0xffff
|
||||
JE partial_success17to31
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop17to31
|
||||
JMP fail
|
||||
partial_success17to31:
|
||||
MOVOU -16(AX)(DI*1), X3
|
||||
PCMPEQB X0, X3
|
||||
PMOVMSKB X3, SI
|
||||
CMPQ SI, $0xffff
|
||||
JE success
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop17to31
|
||||
JMP fail
|
||||
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
|
||||
// So no need to check cpuid
|
||||
_32_or_more:
|
||||
CMPQ AX, $32
|
||||
JA _33_to_63
|
||||
VMOVDQU (R8), Y1
|
||||
LEAQ -31(DI)(DX*1), DX
|
||||
loop32:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPMOVMSKB Y3, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE success_avx2
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop32
|
||||
JMP fail_avx2
|
||||
_33_to_63:
|
||||
LEAQ 1(DI)(DX*1), DX
|
||||
SUBQ AX, DX
|
||||
VMOVDQU -32(R8)(AX*1), Y0
|
||||
VMOVDQU (R8), Y1
|
||||
loop33to63:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPMOVMSKB Y3, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE partial_success33to63
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop33to63
|
||||
JMP fail_avx2
|
||||
partial_success33to63:
|
||||
VMOVDQU -32(AX)(DI*1), Y3
|
||||
VPCMPEQB Y0, Y3, Y4
|
||||
VPMOVMSKB Y4, SI
|
||||
CMPL SI, $0xffffffff
|
||||
JE success_avx2
|
||||
ADDQ $1,DI
|
||||
CMPQ DI,DX
|
||||
JB loop33to63
|
||||
fail_avx2:
|
||||
VZEROUPPER
|
||||
fail:
|
||||
MOVQ $-1, (R11)
|
||||
RET
|
||||
success_avx2:
|
||||
VZEROUPPER
|
||||
JMP success
|
||||
sse42:
|
||||
#ifndef hasSSE42
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1
|
||||
JNE no_sse42
|
||||
#endif
|
||||
CMPQ AX, $12
|
||||
// PCMPESTRI is slower than normal compare,
|
||||
// so using it makes sense only if we advance 4+ bytes per compare
|
||||
// This value was determined experimentally and is the ~same
|
||||
// on Nehalem (first with SSE42) and Haswell.
|
||||
JAE _9_or_more
|
||||
LEAQ 16(R8), SI
|
||||
TESTW $0xff0, SI
|
||||
JEQ no_sse42
|
||||
MOVOU (R8), X1
|
||||
LEAQ -15(DI)(DX*1), SI
|
||||
MOVQ $16, R9
|
||||
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
|
||||
PCALIGN $16
|
||||
loop_sse42:
|
||||
// 0x0c means: unsigned byte compare (bits 0,1 are 00)
|
||||
// for equality (bits 2,3 are 11)
|
||||
// result is not masked or inverted (bits 4,5 are 00)
|
||||
// and corresponds to first matching byte (bit 6 is 0)
|
||||
PCMPESTRI $0x0c, (DI), X1
|
||||
// CX == 16 means no match,
|
||||
// CX > R9 means partial match at the end of the string,
|
||||
// otherwise sep is at offset CX from X1 start
|
||||
CMPQ CX, R9
|
||||
JBE sse42_success
|
||||
ADDQ R9, DI
|
||||
CMPQ DI, SI
|
||||
JB loop_sse42
|
||||
PCMPESTRI $0x0c, -1(SI), X1
|
||||
CMPQ CX, R9
|
||||
JA fail
|
||||
LEAQ -1(SI), DI
|
||||
sse42_success:
|
||||
ADDQ CX, DI
|
||||
success:
|
||||
SUBQ R10, DI
|
||||
MOVQ DI, (R11)
|
||||
RET
|
||||
23
src/internal/bytealg/index_arm64.go
Normal file
23
src/internal/bytealg/index_arm64.go
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
// Empirical data shows that using Index can get better
|
||||
// performance when len(s) <= 16.
|
||||
const MaxBruteForce = 16
|
||||
|
||||
func init() {
|
||||
// Optimize cases where the length of the substring is less than 32 bytes
|
||||
MaxLen = 32
|
||||
}
|
||||
|
||||
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||
// before switching over to Index.
|
||||
// n is the number of bytes processed so far.
|
||||
// See the bytes.Index implementation for details.
|
||||
func Cutover(n int) int {
|
||||
// 1 error per 16 characters, plus a few slop to start.
|
||||
return 4 + n>>4
|
||||
}
|
||||
206
src/internal/bytealg/index_arm64.s
Normal file
206
src/internal/bytealg/index_arm64.s
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||
MOVD a_base+0(FP), R0
|
||||
MOVD a_len+8(FP), R1
|
||||
MOVD b_base+24(FP), R2
|
||||
MOVD b_len+32(FP), R3
|
||||
MOVD $ret+48(FP), R9
|
||||
B indexbody<>(SB)
|
||||
|
||||
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||
MOVD a_base+0(FP), R0
|
||||
MOVD a_len+8(FP), R1
|
||||
MOVD b_base+16(FP), R2
|
||||
MOVD b_len+24(FP), R3
|
||||
MOVD $ret+32(FP), R9
|
||||
B indexbody<>(SB)
|
||||
|
||||
// input:
|
||||
// R0: haystack
|
||||
// R1: length of haystack
|
||||
// R2: needle
|
||||
// R3: length of needle (2 <= len <= 32)
|
||||
// R9: address to put result
|
||||
TEXT indexbody<>(SB),NOSPLIT,$0-56
|
||||
// main idea is to load 'sep' into separate register(s)
|
||||
// to avoid repeatedly re-load it again and again
|
||||
// for sebsequent substring comparisons
|
||||
SUB R3, R1, R4
|
||||
// R4 contains the start of last substring for comparison
|
||||
ADD R0, R4, R4
|
||||
ADD $1, R0, R8
|
||||
|
||||
CMP $8, R3
|
||||
BHI greater_8
|
||||
TBZ $3, R3, len_2_7
|
||||
len_8:
|
||||
// R5 contains 8-byte of sep
|
||||
MOVD (R2), R5
|
||||
loop_8:
|
||||
// R6 contains substring for comparison
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_8
|
||||
B found
|
||||
len_2_7:
|
||||
TBZ $2, R3, len_2_3
|
||||
TBZ $1, R3, len_4_5
|
||||
TBZ $0, R3, len_6
|
||||
len_7:
|
||||
// R5 and R6 contain 7-byte of sep
|
||||
MOVWU (R2), R5
|
||||
// 1-byte overlap with R5
|
||||
MOVWU 3(R2), R6
|
||||
loop_7:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_7
|
||||
MOVWU 2(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_7
|
||||
B found
|
||||
len_6:
|
||||
// R5 and R6 contain 6-byte of sep
|
||||
MOVWU (R2), R5
|
||||
MOVHU 4(R2), R6
|
||||
loop_6:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_6
|
||||
MOVHU 3(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_6
|
||||
B found
|
||||
len_4_5:
|
||||
TBZ $0, R3, len_4
|
||||
len_5:
|
||||
// R5 and R7 contain 5-byte of sep
|
||||
MOVWU (R2), R5
|
||||
MOVBU 4(R2), R7
|
||||
loop_5:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_5
|
||||
MOVBU 3(R0), R3
|
||||
CMP R7, R3
|
||||
BNE loop_5
|
||||
B found
|
||||
len_4:
|
||||
// R5 contains 4-byte of sep
|
||||
MOVWU (R2), R5
|
||||
loop_4:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_4
|
||||
B found
|
||||
len_2_3:
|
||||
TBZ $0, R3, len_2
|
||||
len_3:
|
||||
// R6 and R7 contain 3-byte of sep
|
||||
MOVHU (R2), R6
|
||||
MOVBU 2(R2), R7
|
||||
loop_3:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVHU.P 1(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_3
|
||||
MOVBU 1(R0), R3
|
||||
CMP R7, R3
|
||||
BNE loop_3
|
||||
B found
|
||||
len_2:
|
||||
// R5 contains 2-byte of sep
|
||||
MOVHU (R2), R5
|
||||
loop_2:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVHU.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_2
|
||||
found:
|
||||
SUB R8, R0, R0
|
||||
MOVD R0, (R9)
|
||||
RET
|
||||
not_found:
|
||||
MOVD $-1, R0
|
||||
MOVD R0, (R9)
|
||||
RET
|
||||
greater_8:
|
||||
SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes
|
||||
CMP $16, R3
|
||||
BHI greater_16
|
||||
len_9_16:
|
||||
MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep
|
||||
SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes
|
||||
MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep
|
||||
loop_9_16:
|
||||
// search the first 8 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R7
|
||||
CMP R5, R7
|
||||
BNE loop_9_16
|
||||
MOVD (R0)(R11), R7
|
||||
CMP R6, R7 // compare the last 8 bytes
|
||||
BNE loop_9_16
|
||||
B found
|
||||
greater_16:
|
||||
CMP $24, R3
|
||||
BHI len_25_32
|
||||
len_17_24:
|
||||
LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep
|
||||
SUB $24, R3, R10 // len(sep) - 24
|
||||
MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep
|
||||
loop_17_24:
|
||||
// search the first 16 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R10
|
||||
CMP R5, R10
|
||||
BNE loop_17_24
|
||||
MOVD 7(R0), R10
|
||||
CMP R6, R10
|
||||
BNE loop_17_24
|
||||
MOVD (R0)(R11), R10
|
||||
CMP R7, R10 // compare the last 8 bytes
|
||||
BNE loop_17_24
|
||||
B found
|
||||
len_25_32:
|
||||
LDP.P 16(R2), (R5, R6)
|
||||
MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep
|
||||
SUB $32, R3, R12 // len(sep) - 32
|
||||
MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep
|
||||
loop_25_32:
|
||||
// search the first 24 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R12
|
||||
CMP R5, R12
|
||||
BNE loop_25_32
|
||||
MOVD 7(R0), R12
|
||||
CMP R6, R12
|
||||
BNE loop_25_32
|
||||
MOVD 15(R0), R12
|
||||
CMP R7, R12
|
||||
BNE loop_25_32
|
||||
MOVD (R0)(R11), R12
|
||||
CMP R10, R12 // compare the last 8 bytes
|
||||
BNE loop_25_32
|
||||
B found
|
||||
29
src/internal/bytealg/index_generic.go
Normal file
29
src/internal/bytealg/index_generic.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !amd64 && !arm64 && !s390x && !ppc64le && !ppc64
|
||||
|
||||
package bytealg
|
||||
|
||||
const MaxBruteForce = 0
|
||||
|
||||
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||
// Requires 2 <= len(b) <= MaxLen.
|
||||
func Index(a, b []byte) int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||
// Requires 2 <= len(b) <= MaxLen.
|
||||
func IndexString(a, b string) int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||
// before switching over to Index.
|
||||
// n is the number of bytes processed so far.
|
||||
// See the bytes.Index implementation for details.
|
||||
func Cutover(n int) int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
19
src/internal/bytealg/index_native.go
Normal file
19
src/internal/bytealg/index_native.go
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64 || arm64 || s390x || ppc64le || ppc64
|
||||
|
||||
package bytealg
|
||||
|
||||
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||
// Requires 2 <= len(b) <= MaxLen.
|
||||
//
|
||||
//go:noescape
|
||||
func Index(a, b []byte) int
|
||||
|
||||
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
|
||||
// Requires 2 <= len(b) <= MaxLen.
|
||||
//
|
||||
//go:noescape
|
||||
func IndexString(a, b string) int
|
||||
26
src/internal/bytealg/index_ppc64x.go
Normal file
26
src/internal/bytealg/index_ppc64x.go
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
package bytealg
|
||||
|
||||
import "internal/cpu"
|
||||
|
||||
const MaxBruteForce = 16
|
||||
|
||||
var SupportsPower9 = cpu.PPC64.IsPOWER9
|
||||
|
||||
func init() {
|
||||
MaxLen = 32
|
||||
}
|
||||
|
||||
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||
// before switching over to Index.
|
||||
// n is the number of bytes processed so far.
|
||||
// See the bytes.Index implementation for details.
|
||||
func Cutover(n int) int {
|
||||
// 1 error per 8 characters, plus a few slop to start.
|
||||
return (n + 16) / 8
|
||||
}
|
||||
841
src/internal/bytealg/index_ppc64x.s
Normal file
841
src/internal/bytealg/index_ppc64x.s
Normal file
@@ -0,0 +1,841 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This is an implementation based on the s390x
|
||||
// implementation.
|
||||
|
||||
// Find a separator with 2 <= len <= 32 within a string.
|
||||
// Separators with lengths of 2, 3 or 4 are handled
|
||||
// specially.
|
||||
|
||||
// This works on power8 and above. The loads and
|
||||
// compares are done in big endian order
|
||||
// since that allows the used of VCLZD, and allows
|
||||
// the same implementation to work on big and little
|
||||
// endian platforms with minimal conditional changes.
|
||||
|
||||
// NOTE: There is a power9 implementation that
|
||||
// improves performance by 10-15% on little
|
||||
// endian for some of the benchmarks.
|
||||
// Unrolled index2to16 loop by 4 on ppc64le/power9
|
||||
// Work is still needed for a big endian
|
||||
// implementation on power9.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Needed to swap LXVD2X loads to the correct
|
||||
// byte order to work on POWER8.
|
||||
|
||||
#ifdef GOARCH_ppc64
|
||||
DATA byteswap<>+0(SB)/8, $0x0001020304050607
|
||||
DATA byteswap<>+8(SB)/8, $0x08090a0b0c0d0e0f
|
||||
#else
|
||||
DATA byteswap<>+0(SB)/8, $0x0706050403020100
|
||||
DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
|
||||
#endif
|
||||
|
||||
// Load bytes in big endian order. Address
|
||||
// alignment does not need checking.
|
||||
#define VLOADSWAP(base, index, vreg, vsreg) \
|
||||
LXVD2X (base)(index), vsreg; \
|
||||
VPERM vreg, vreg, SWAP, vreg
|
||||
|
||||
GLOBL byteswap<>+0(SB), RODATA, $16
|
||||
|
||||
TEXT ·Index<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
|
||||
// R3 = byte array pointer
|
||||
// R4 = length
|
||||
MOVD R6, R5 // R5 = separator pointer
|
||||
MOVD R7, R6 // R6 = separator length
|
||||
|
||||
#ifdef GOARCH_ppc64le
|
||||
MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
|
||||
CMP R7, $1
|
||||
BNE power8
|
||||
BR indexbodyp9<>(SB)
|
||||
#endif
|
||||
power8:
|
||||
BR indexbody<>(SB)
|
||||
|
||||
TEXT ·IndexString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// R3 = string
|
||||
// R4 = length
|
||||
// R5 = separator pointer
|
||||
// R6 = separator length
|
||||
|
||||
#ifdef GOARCH_ppc64le
|
||||
MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
|
||||
CMP R7, $1
|
||||
BNE power8
|
||||
BR indexbodyp9<>(SB)
|
||||
|
||||
#endif
|
||||
power8:
|
||||
BR indexbody<>(SB)
|
||||
|
||||
// s: string we are searching
|
||||
// sep: string to search for
|
||||
// R3=&s[0], R4=len(s)
|
||||
// R5=&sep[0], R6=len(sep)
|
||||
// R14=&ret (index where sep found)
|
||||
// R7=working addr of string
|
||||
// R16=index value 16
|
||||
// R17=index value 17
|
||||
// R18=index value 18
|
||||
// R19=index value 1
|
||||
// R26=LASTBYTE of string
|
||||
// R27=LASTSTR last start byte to compare with sep
|
||||
// R8, R9 scratch
|
||||
// V0=sep left justified zero fill
|
||||
// CR4=sep length >= 16
|
||||
|
||||
#define SEPMASK V17
|
||||
#define LASTBYTE R26
|
||||
#define LASTSTR R27
|
||||
#define ONES V20
|
||||
#define SWAP V21
|
||||
#define SWAP_ VS53
|
||||
TEXT indexbody<>(SB), NOSPLIT|NOFRAME, $0
|
||||
CMP R6, R4 // Compare lengths
|
||||
BGT notfound // If sep len is > string, notfound
|
||||
ADD R4, R3, LASTBYTE // find last byte addr
|
||||
SUB R6, LASTBYTE, LASTSTR // LAST=&s[len(s)-len(sep)] (last valid start index)
|
||||
CMP R6, $0 // Check sep len
|
||||
BEQ notfound // sep len 0 -- not found
|
||||
MOVD R3, R7 // Copy of string addr
|
||||
MOVD $16, R16 // Index value 16
|
||||
MOVD $17, R17 // Index value 17
|
||||
MOVD $18, R18 // Index value 18
|
||||
MOVD $1, R19 // Index value 1
|
||||
MOVD $byteswap<>+00(SB), R8
|
||||
VSPLTISB $0xFF, ONES // splat all 1s
|
||||
LXVD2X (R8)(R0), SWAP_ // Set up swap string
|
||||
|
||||
CMP R6, $16, CR4 // CR4 for len(sep) >= 16
|
||||
VOR ONES, ONES, SEPMASK // Set up full SEPMASK
|
||||
BGE CR4, loadge16 // Load for len(sep) >= 16
|
||||
SUB R6, R16, R9 // 16-len of sep
|
||||
SLD $3, R9 // Set up for VSLO
|
||||
MTVSRD R9, V9 // Set up for VSLO
|
||||
VSLDOI $8, V9, V9, V9 // Set up for VSLO
|
||||
VSLO ONES, V9, SEPMASK // Mask for separator len(sep) < 16
|
||||
|
||||
loadge16:
|
||||
ANDCC $15, R5, R9 // Find byte offset of sep
|
||||
ADD R9, R6, R10 // Add sep len
|
||||
CMP R10, $16 // Check if sep len+offset > 16
|
||||
BGT sepcross16 // Sep crosses 16 byte boundary
|
||||
|
||||
RLDICR $0, R5, $59, R8 // Adjust addr to 16 byte container
|
||||
VLOADSWAP(R8, R0, V0, V0) // Load 16 bytes @R8 into V0
|
||||
SLD $3, R9 // Set up shift count for VSLO
|
||||
MTVSRD R9, V8 // Set up shift count for VSLO
|
||||
VSLDOI $8, V8, V8, V8
|
||||
VSLO V0, V8, V0 // Shift by start byte
|
||||
|
||||
VAND V0, SEPMASK, V0 // Mask separator (< 16)
|
||||
BR index2plus
|
||||
|
||||
sepcross16:
|
||||
VLOADSWAP(R5, R0, V0, V0) // Load 16 bytes @R5 into V0
|
||||
|
||||
VAND V0, SEPMASK, V0 // mask out separator
|
||||
BLE CR4, index2to16
|
||||
BR index17plus // Handle sep > 16
|
||||
|
||||
index2plus:
|
||||
CMP R6, $2 // Check length of sep
|
||||
BNE index3plus // If not 2, check for 3
|
||||
ADD $16, R7, R9 // Check if next 16 bytes past last
|
||||
CMP R9, LASTBYTE // compare with last
|
||||
BGE index2to16 // 2 <= len(string) <= 16
|
||||
MOVD $0xff00, R21 // Mask for later
|
||||
MTVSRD R21, V25 // Move to Vreg
|
||||
VSPLTH $3, V25, V31 // Splat mask
|
||||
VSPLTH $0, V0, V1 // Splat 1st 2 bytes of sep
|
||||
VSPLTISB $0, V10 // Clear V10
|
||||
|
||||
// First case: 2 byte separator
|
||||
// V1: 2 byte separator splatted
|
||||
// V2: 16 bytes at addr
|
||||
// V4: 16 bytes at addr+1
|
||||
// Compare 2 byte separator at start
|
||||
// and at start+1. Use VSEL to combine
|
||||
// those results to find the first
|
||||
// matching start byte, returning
|
||||
// that value when found. Loop as
|
||||
// long as len(string) > 16
|
||||
index2loop2:
|
||||
VLOADSWAP(R7, R19, V3, V3) // Load 16 bytes @R7+1 into V3
|
||||
|
||||
index2loop:
|
||||
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
|
||||
VCMPEQUH V1, V2, V5 // Search for sep
|
||||
VCMPEQUH V1, V3, V6 // Search for sep offset by 1
|
||||
VSEL V6, V5, V31, V7 // merge even and odd indices
|
||||
VCLZD V7, V18 // find index of first match
|
||||
MFVSRD V18, R25 // get first value
|
||||
CMP R25, $64 // Found if < 64
|
||||
BLT foundR25 // Return byte index where found
|
||||
VSLDOI $8, V18, V18, V18 // Adjust 2nd value
|
||||
MFVSRD V18, R25 // get second value
|
||||
CMP R25, $64 // Found if < 64
|
||||
ADD $64, R25 // Update byte offset
|
||||
BLT foundR25 // Return value
|
||||
ADD $16, R7 // R7+=16 Update string pointer
|
||||
ADD $17, R7, R9 // R9=F7+17 since loop unrolled
|
||||
CMP R9, LASTBYTE // Compare addr+17 against last byte
|
||||
BLT index2loop2 // If < last, continue loop
|
||||
CMP R7, LASTBYTE // Compare addr+16 against last byte
|
||||
BLT index2to16 // If < 16 handle specially
|
||||
VLOADSWAP(R7, R0, V3, V3) // Load 16 bytes @R7 into V3
|
||||
VSLDOI $1, V3, V10, V3 // Shift left by 1 byte
|
||||
BR index2loop
|
||||
|
||||
index3plus:
|
||||
CMP R6, $3 // Check if sep == 3
|
||||
BNE index4plus // If not check larger
|
||||
ADD $19, R7, R9 // Find bytes for use in this loop
|
||||
CMP R9, LASTBYTE // Compare against last byte
|
||||
BGE index2to16 // Remaining string 2<=len<=16
|
||||
MOVD $0xff00, R21 // Set up mask for upcoming loop
|
||||
MTVSRD R21, V25 // Move mask to Vreg
|
||||
VSPLTH $3, V25, V31 // Splat mask
|
||||
VSPLTH $0, V0, V1 // Splat 1st two bytes of sep
|
||||
VSPLTB $2, V0, V8 // Splat 3rd byte of sep
|
||||
|
||||
// Loop to process 3 byte separator.
|
||||
// string[0:16] is in V2
|
||||
// string[2:18] is in V3
|
||||
// sep[0:2] splatted in V1
|
||||
// sec[3] splatted in v8
|
||||
// Load vectors at string, string+1
|
||||
// and string+2. Compare string, string+1
|
||||
// against first 2 bytes of separator
|
||||
// splatted, and string+2 against 3rd
|
||||
// byte splatted. Merge the results with
|
||||
// VSEL to find the first byte of a match.
|
||||
|
||||
// Special handling for last 16 bytes if the
|
||||
// string fits in 16 byte multiple.
|
||||
index3loop2:
|
||||
MOVD $2, R21 // Set up index for 2
|
||||
VSPLTISB $0, V10 // Clear V10
|
||||
VLOADSWAP(R7, R21, V3, V3)// Load 16 bytes @R7+2 into V3
|
||||
VSLDOI $14, V3, V10, V3 // Left justify next 2 bytes
|
||||
|
||||
index3loop:
|
||||
VLOADSWAP(R7, R0, V2, V2) // Load with correct order
|
||||
VSLDOI $1, V2, V3, V4 // string[1:17]
|
||||
VSLDOI $2, V2, V3, V9 // string[2:18]
|
||||
VCMPEQUH V1, V2, V5 // compare hw even indices
|
||||
VCMPEQUH V1, V4, V6 // compare hw odd indices
|
||||
VCMPEQUB V8, V9, V10 // compare 3rd to last byte
|
||||
VSEL V6, V5, V31, V7 // Find 1st matching byte using mask
|
||||
VAND V7, V10, V7 // AND matched bytes with matched 3rd byte
|
||||
VCLZD V7, V18 // Find first nonzero indexes
|
||||
MFVSRD V18, R25 // Move 1st doubleword
|
||||
CMP R25, $64 // If < 64 found
|
||||
BLT foundR25 // Return matching index
|
||||
VSLDOI $8, V18, V18, V18 // Move value
|
||||
MFVSRD V18, R25 // Move 2nd doubleword
|
||||
CMP R25, $64 // If < 64 found
|
||||
ADD $64, R25 // Update byte index
|
||||
BLT foundR25 // Return matching index
|
||||
ADD $16, R7 // R7+=16 string ptr
|
||||
ADD $19, R7, R9 // Number of string bytes for loop
|
||||
CMP R9, LASTBYTE // Compare against last byte of string
|
||||
BLT index3loop2 // If within, continue this loop
|
||||
CMP R7, LASTSTR // Compare against last start byte
|
||||
BLT index2to16 // Process remainder
|
||||
VSPLTISB $0, V3 // Special case for last 16 bytes
|
||||
BR index3loop // Continue this loop
|
||||
|
||||
// Loop to process 4 byte separator
|
||||
// string[0:16] in V2
|
||||
// string[3:16] in V3
|
||||
// sep[0:4] splatted in V1
|
||||
// Set up vectors with strings at offsets
|
||||
// 0, 1, 2, 3 and compare against the 4 byte
|
||||
// separator also splatted. Use VSEL with the
|
||||
// compare results to find the first byte where
|
||||
// a separator match is found.
|
||||
index4plus:
|
||||
CMP R6, $4 // Check if 4 byte separator
|
||||
BNE index5plus // If not next higher
|
||||
ADD $20, R7, R9 // Check string size to load
|
||||
CMP R9, LASTBYTE // Verify string length
|
||||
BGE index2to16 // If not large enough, process remaining
|
||||
MOVD $2, R15 // Set up index
|
||||
|
||||
// Set up masks for use with VSEL
|
||||
MOVD $0xff, R21 // Set up mask 0xff000000ff000000...
|
||||
SLD $24, R21
|
||||
MTVSRD R21, V10
|
||||
VSPLTW $1, V10, V29
|
||||
VSLDOI $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
|
||||
MOVD $0xffff, R21
|
||||
SLD $16, R21
|
||||
MTVSRD R21, V10
|
||||
VSPLTW $1, V10, V31 // Mask 0xffff0000ffff0000...
|
||||
VSPLTW $0, V0, V1 // Splat 1st word of separator
|
||||
|
||||
index4loop:
|
||||
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
|
||||
|
||||
next4:
|
||||
VSPLTISB $0, V10 // Clear
|
||||
MOVD $3, R9 // Number of bytes beyond 16
|
||||
VLOADSWAP(R7, R9, V3, V3) // Load 16 bytes @R7+3 into V3
|
||||
VSLDOI $13, V3, V10, V3 // Shift left last 3 bytes
|
||||
VSLDOI $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||
VSLDOI $2, V2, V3, V9 // V9=(V2:V3)<<2
|
||||
VSLDOI $3, V2, V3, V10 // V10=(V2:v3)<<3
|
||||
VCMPEQUW V1, V2, V5 // compare index 0, 4, ... with sep
|
||||
VCMPEQUW V1, V4, V6 // compare index 1, 5, ... with sep
|
||||
VCMPEQUW V1, V9, V11 // compare index 2, 6, ... with sep
|
||||
VCMPEQUW V1, V10, V12 // compare index 3, 7, ... with sep
|
||||
VSEL V6, V5, V29, V13 // merge index 0, 1, 4, 5, using mask
|
||||
VSEL V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
|
||||
VSEL V14, V13, V31, V7 // final merge
|
||||
VCLZD V7, V18 // Find first index for each half
|
||||
MFVSRD V18, R25 // Isolate value
|
||||
CMP R25, $64 // If < 64, found
|
||||
BLT foundR25 // Return found index
|
||||
VSLDOI $8, V18, V18, V18 // Move for MFVSRD
|
||||
MFVSRD V18, R25 // Isolate other value
|
||||
CMP R25, $64 // If < 64, found
|
||||
ADD $64, R25 // Update index for high doubleword
|
||||
BLT foundR25 // Return found index
|
||||
ADD $16, R7 // R7+=16 for next string
|
||||
ADD $20, R7, R9 // R+20 for all bytes to load
|
||||
CMP R9, LASTBYTE // Past end? Maybe check for extra?
|
||||
BLT index4loop // If not, continue loop
|
||||
CMP R7, LASTSTR // Check remainder
|
||||
BLE index2to16 // Process remainder
|
||||
BR notfound // Not found
|
||||
|
||||
index5plus:
|
||||
CMP R6, $16 // Check for sep > 16
|
||||
BGT index17plus // Handle large sep
|
||||
|
||||
// Assumption is that the separator is smaller than the string at this point
|
||||
index2to16:
|
||||
CMP R7, LASTSTR // Compare last start byte
|
||||
BGT notfound // last takes len(sep) into account
|
||||
|
||||
ADD $16, R7, R9 // Check for last byte of string
|
||||
CMP R9, LASTBYTE
|
||||
BGT index2to16tail
|
||||
|
||||
// At least 16 bytes of string left
|
||||
// Mask the number of bytes in sep
|
||||
index2to16loop:
|
||||
VLOADSWAP(R7, R0, V1, V1) // Load 16 bytes @R7 into V1
|
||||
|
||||
compare:
|
||||
VAND V1, SEPMASK, V2 // Mask out sep size
|
||||
VCMPEQUBCC V0, V2, V3 // Compare masked string
|
||||
BLT CR6, found // All equal
|
||||
ADD $1, R7 // Update ptr to next byte
|
||||
CMP R7, LASTSTR // Still less than last start byte
|
||||
BGT notfound // Not found
|
||||
ADD $16, R7, R9 // Verify remaining bytes
|
||||
CMP R9, LASTBYTE // At least 16
|
||||
BLT index2to16loop // Try again
|
||||
|
||||
// Less than 16 bytes remaining in string
|
||||
// Separator >= 2
|
||||
index2to16tail:
|
||||
ADD R3, R4, R9 // End of string
|
||||
SUB R7, R9, R9 // Number of bytes left
|
||||
ANDCC $15, R7, R10 // 16 byte offset
|
||||
ADD R10, R9, R11 // offset + len
|
||||
CMP R11, $16 // >= 16?
|
||||
BLE short // Does not cross 16 bytes
|
||||
VLOADSWAP(R7, R0, V1, V1) // Load 16 bytes @R7 into V1
|
||||
BR index2to16next // Continue on
|
||||
|
||||
short:
|
||||
RLDICR $0, R7, $59, R9 // Adjust addr to 16 byte container
|
||||
VLOADSWAP(R9, R0, V1, V1)// Load 16 bytes @R9 into V1
|
||||
SLD $3, R10 // Set up shift
|
||||
MTVSRD R10, V8 // Set up shift
|
||||
VSLDOI $8, V8, V8, V8
|
||||
VSLO V1, V8, V1 // Shift by start byte
|
||||
VSPLTISB $0, V25 // Clear for later use
|
||||
|
||||
index2to16next:
|
||||
VAND V1, SEPMASK, V2 // Just compare size of sep
|
||||
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BGT notfound // If at end, then not found
|
||||
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte
|
||||
BR index2to16next // Check the next partial string
|
||||
|
||||
index17plus:
|
||||
CMP R6, $32 // Check if 17 < len(sep) <= 32
|
||||
BGT index33plus
|
||||
SUB $16, R6, R9 // Extra > 16
|
||||
SLD $56, R9, R10 // Shift to use in VSLO
|
||||
MTVSRD R10, V9 // Set up for VSLO
|
||||
VLOADSWAP(R5, R9, V1, V1)// Load 16 bytes @R5+R9 into V1
|
||||
VSLO V1, V9, V1 // Shift left
|
||||
VSPLTISB $0xff, V7 // Splat 1s
|
||||
VSPLTISB $0, V27 // Splat 0
|
||||
|
||||
index17to32loop:
|
||||
VLOADSWAP(R7, R0, V2, V2) // Load 16 bytes @R7 into V2
|
||||
|
||||
next17:
|
||||
VLOADSWAP(R7, R9, V3, V3) // Load 16 bytes @R7+R9 into V3
|
||||
VSLO V3, V9, V3 // Shift left
|
||||
VCMPEQUB V0, V2, V4 // Compare first 16 bytes
|
||||
VCMPEQUB V1, V3, V5 // Compare extra over 16 bytes
|
||||
VAND V4, V5, V6 // Check if both equal
|
||||
VCMPEQUBCC V6, V7, V8 // All equal?
|
||||
BLT CR6, found // Yes
|
||||
ADD $1, R7 // On to next byte
|
||||
CMP R7, LASTSTR // Check if last start byte
|
||||
BGT notfound // If too high, not found
|
||||
BR index17to32loop // Continue
|
||||
|
||||
notfound:
|
||||
MOVD $-1, R3 // Return -1 if not found
|
||||
RET
|
||||
|
||||
index33plus:
|
||||
MOVD $0, (R0) // Case not implemented
|
||||
RET // Crash before return
|
||||
|
||||
foundR25:
|
||||
SRD $3, R25 // Convert from bits to bytes
|
||||
ADD R25, R7 // Add to current string address
|
||||
SUB R3, R7 // Subtract from start of string
|
||||
MOVD R7, R3 // Return byte where found
|
||||
RET
|
||||
|
||||
found:
|
||||
SUB R3, R7 // Return byte where found
|
||||
MOVD R7, R3
|
||||
RET
|
||||
|
||||
TEXT indexbodyp9<>(SB), NOSPLIT|NOFRAME, $0
|
||||
CMP R6, R4 // Compare lengths
|
||||
BGT notfound // If sep len is > string, notfound
|
||||
ADD R4, R3, LASTBYTE // find last byte addr
|
||||
SUB R6, LASTBYTE, LASTSTR // LAST=&s[len(s)-len(sep)] (last valid start index)
|
||||
CMP R6, $0 // Check sep len
|
||||
BEQ notfound // sep len 0 -- not found
|
||||
MOVD R3, R7 // Copy of string addr
|
||||
#ifndef GOPPC64_power10
|
||||
MOVD $16, R16 // Index value 16
|
||||
MOVD $17, R17 // Index value 17
|
||||
MOVD $18, R18 // Index value 18
|
||||
VSPLTISB $0xFF, ONES // splat all 1s
|
||||
VOR ONES, ONES, SEPMASK // Set up full SEPMASK
|
||||
#else
|
||||
SLD $56, R6, R14 // Set up separator length for LXVLL
|
||||
#endif
|
||||
MOVD $1, R19 // Index value 1
|
||||
CMP R6, $16, CR4 // CR4 for len(sep) >= 16
|
||||
BGE CR4, loadge16 // Load for len(sep) >= 16
|
||||
#ifndef GOPPC64_power10
|
||||
SUB R6, R16, R9 // 16-len of sep
|
||||
SLD $3, R9 // Set up for VSLO
|
||||
MTVSRD R9, V9 // Set up for VSLO
|
||||
VSLDOI $8, V9, V9, V9 // Set up for VSLO
|
||||
VSLO ONES, V9, SEPMASK // Mask for separator len(sep) < 16
|
||||
#endif
|
||||
loadge16:
|
||||
ANDCC $15, R5, R9 // Find byte offset of sep
|
||||
ADD R9, R6, R10 // Add sep len
|
||||
CMP R10, $16 // Check if sep len+offset > 16
|
||||
BGT sepcross16 // Sep crosses 16 byte boundary
|
||||
#ifdef GOPPC64_power10
|
||||
LXVLL R5, R14, V0 // Load separator
|
||||
#else
|
||||
RLDICR $0, R5, $59, R8 // Adjust addr to 16 byte container
|
||||
LXVB16X (R8)(R0), V0 // Load 16 bytes @R8 into V0
|
||||
SLD $3, R9 // Set up shift count for VSLO
|
||||
MTVSRD R9, V8 // Set up shift count for VSLO
|
||||
VSLDOI $8, V8, V8, V8
|
||||
VSLO V0, V8, V0 // Shift by start byte
|
||||
VAND V0, SEPMASK, V0 // Mask separator (< 16)
|
||||
#endif
|
||||
BR index2plus
|
||||
sepcross16:
|
||||
#ifdef GOPPC64_power10
|
||||
LXVLL R5, R14, V0 // Load separator
|
||||
#else
|
||||
LXVB16X (R5)(R0), V0 // Load 16 bytes @R5 into V0\
|
||||
VAND V0, SEPMASK, V0 // mask out separator
|
||||
#endif
|
||||
BLE CR4, index2to16
|
||||
BR index17plus // Handle sep > 16
|
||||
|
||||
index2plus:
|
||||
CMP R6, $2 // Check length of sep
|
||||
BNE index3plus // If not 2, check for 3
|
||||
ADD $16, R7, R9 // Check if next 16 bytes past last
|
||||
CMP R9, LASTBYTE // compare with last
|
||||
BGE index2to16 // 2 <= len(string) <= 16
|
||||
MOVD $0xff00, R21 // Mask for later
|
||||
MTVSRD R21, V25 // Move to Vreg
|
||||
VSPLTH $3, V25, V31 // Splat mask
|
||||
VSPLTH $0, V0, V1 // Splat 1st 2 bytes of sep
|
||||
VSPLTISB $0, V10 // Clear V10
|
||||
|
||||
// First case: 2 byte separator
|
||||
// V1: 2 byte separator splatted
|
||||
// V2: 16 bytes at addr
|
||||
// V4: 16 bytes at addr+1
|
||||
// Compare 2 byte separator at start
|
||||
// and at start+1. Use VSEL to combine
|
||||
// those results to find the first
|
||||
// matching start byte, returning
|
||||
// that value when found. Loop as
|
||||
// long as len(string) > 16
|
||||
index2loop2:
|
||||
LXVB16X (R7)(R19), V3 // Load 16 bytes @R7+1 into V3
|
||||
|
||||
index2loop:
|
||||
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
|
||||
VCMPEQUH V1, V2, V5 // Search for sep
|
||||
VCMPEQUH V1, V3, V6 // Search for sep offset by 1
|
||||
VSEL V6, V5, V31, V7 // merge even and odd indices
|
||||
VCLZD V7, V18 // find index of first match
|
||||
MFVSRD V18, R25 // get first value
|
||||
CMP R25, $64 // Found if < 64
|
||||
BLT foundR25 // Return byte index where found
|
||||
|
||||
MFVSRLD V18, R25 // get second value
|
||||
CMP R25, $64 // Found if < 64
|
||||
ADD $64, R25 // Update byte offset
|
||||
BLT foundR25 // Return value
|
||||
ADD $16, R7 // R7+=16 Update string pointer
|
||||
ADD $17, R7, R9 // R9=F7+17 since loop unrolled
|
||||
CMP R9, LASTBYTE // Compare addr+17 against last byte
|
||||
BLT index2loop2 // If < last, continue loop
|
||||
CMP R7, LASTBYTE // Compare addr+16 against last byte
|
||||
BLT index2to16 // If < 16 handle specially
|
||||
LXVB16X (R7)(R0), V3 // Load 16 bytes @R7 into V3
|
||||
VSLDOI $1, V3, V10, V3 // Shift left by 1 byte
|
||||
BR index2loop
|
||||
|
||||
index3plus:
|
||||
CMP R6, $3 // Check if sep == 3
|
||||
BNE index4plus // If not check larger
|
||||
ADD $19, R7, R9 // Find bytes for use in this loop
|
||||
CMP R9, LASTBYTE // Compare against last byte
|
||||
BGE index2to16 // Remaining string 2<=len<=16
|
||||
MOVD $0xff00, R21 // Set up mask for upcoming loop
|
||||
MTVSRD R21, V25 // Move mask to Vreg
|
||||
VSPLTH $3, V25, V31 // Splat mask
|
||||
VSPLTH $0, V0, V1 // Splat 1st two bytes of sep
|
||||
VSPLTB $2, V0, V8 // Splat 3rd byte of sep
|
||||
|
||||
// Loop to process 3 byte separator.
|
||||
// string[0:16] is in V2
|
||||
// string[2:18] is in V3
|
||||
// sep[0:2] splatted in V1
|
||||
// sec[3] splatted in v8
|
||||
// Load vectors at string, string+1
|
||||
// and string+2. Compare string, string+1
|
||||
// against first 2 bytes of separator
|
||||
// splatted, and string+2 against 3rd
|
||||
// byte splatted. Merge the results with
|
||||
// VSEL to find the first byte of a match.
|
||||
|
||||
// Special handling for last 16 bytes if the
|
||||
// string fits in 16 byte multiple.
|
||||
index3loop2:
|
||||
MOVD $2, R21 // Set up index for 2
|
||||
VSPLTISB $0, V10 // Clear V10
|
||||
LXVB16X (R7)(R21), V3 // Load 16 bytes @R7+2 into V3
|
||||
VSLDOI $14, V3, V10, V3 // Left justify next 2 bytes
|
||||
|
||||
index3loop:
|
||||
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7
|
||||
VSLDOI $1, V2, V3, V4 // string[1:17]
|
||||
VSLDOI $2, V2, V3, V9 // string[2:18]
|
||||
VCMPEQUH V1, V2, V5 // compare hw even indices
|
||||
VCMPEQUH V1, V4, V6 // compare hw odd indices
|
||||
VCMPEQUB V8, V9, V10 // compare 3rd to last byte
|
||||
VSEL V6, V5, V31, V7 // Find 1st matching byte using mask
|
||||
VAND V7, V10, V7 // AND matched bytes with matched 3rd byte
|
||||
VCLZD V7, V18 // Find first nonzero indexes
|
||||
MFVSRD V18, R25 // Move 1st doubleword
|
||||
CMP R25, $64 // If < 64 found
|
||||
BLT foundR25 // Return matching index
|
||||
|
||||
MFVSRLD V18, R25 // Move 2nd doubleword
|
||||
CMP R25, $64 // If < 64 found
|
||||
ADD $64, R25 // Update byte index
|
||||
BLT foundR25 // Return matching index
|
||||
ADD $16, R7 // R7+=16 string ptr
|
||||
ADD $19, R7, R9 // Number of string bytes for loop
|
||||
CMP R9, LASTBYTE // Compare against last byte of string
|
||||
BLT index3loop2 // If within, continue this loop
|
||||
CMP R7, LASTSTR // Compare against last start byte
|
||||
BLT index2to16 // Process remainder
|
||||
VSPLTISB $0, V3 // Special case for last 16 bytes
|
||||
BR index3loop // Continue this loop
|
||||
|
||||
// Loop to process 4 byte separator
|
||||
// string[0:16] in V2
|
||||
// string[3:16] in V3
|
||||
// sep[0:4] splatted in V1
|
||||
// Set up vectors with strings at offsets
|
||||
// 0, 1, 2, 3 and compare against the 4 byte
|
||||
// separator also splatted. Use VSEL with the
|
||||
// compare results to find the first byte where
|
||||
// a separator match is found.
|
||||
index4plus:
|
||||
CMP R6, $4 // Check if 4 byte separator
|
||||
BNE index5plus // If not next higher
|
||||
ADD $20, R7, R9 // Check string size to load
|
||||
CMP R9, LASTBYTE // Verify string length
|
||||
BGE index2to16 // If not large enough, process remaining
|
||||
|
||||
// Set up masks for use with VSEL
|
||||
MOVD $0xff, R21 // Set up mask 0xff000000ff000000...
|
||||
SLD $24, R21
|
||||
MTVSRWS R21, V29
|
||||
|
||||
VSLDOI $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
|
||||
MOVD $0xffff, R21
|
||||
SLD $16, R21
|
||||
MTVSRWS R21, V31
|
||||
|
||||
VSPLTW $0, V0, V1 // Splat 1st word of separator
|
||||
|
||||
index4loop:
|
||||
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
|
||||
|
||||
next4:
|
||||
VSPLTISB $0, V10 // Clear
|
||||
MOVD $3, R9 // Number of bytes beyond 16
|
||||
LXVB16X (R7)(R9), V3 // Load 16 bytes @R7 into V3
|
||||
VSLDOI $13, V3, V10, V3 // Shift left last 3 bytes
|
||||
VSLDOI $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||
VSLDOI $2, V2, V3, V9 // V9=(V2:V3)<<2
|
||||
VSLDOI $3, V2, V3, V10 // V10=(V2:v3)<<3
|
||||
VCMPEQUW V1, V2, V5 // compare index 0, 4, ... with sep
|
||||
VCMPEQUW V1, V4, V6 // compare index 1, 5, ... with sep
|
||||
VCMPEQUW V1, V9, V11 // compare index 2, 6, ... with sep
|
||||
VCMPEQUW V1, V10, V12 // compare index 3, 7, ... with sep
|
||||
VSEL V6, V5, V29, V13 // merge index 0, 1, 4, 5, using mask
|
||||
VSEL V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
|
||||
VSEL V14, V13, V31, V7 // final merge
|
||||
VCLZD V7, V18 // Find first index for each half
|
||||
MFVSRD V18, R25 // Isolate value
|
||||
CMP R25, $64 // If < 64, found
|
||||
BLT foundR25 // Return found index
|
||||
|
||||
MFVSRLD V18, R25 // Isolate other value
|
||||
CMP R25, $64 // If < 64, found
|
||||
ADD $64, R25 // Update index for high doubleword
|
||||
BLT foundR25 // Return found index
|
||||
ADD $16, R7 // R7+=16 for next string
|
||||
ADD $20, R7, R9 // R+20 for all bytes to load
|
||||
CMP R9, LASTBYTE // Past end? Maybe check for extra?
|
||||
BLT index4loop // If not, continue loop
|
||||
CMP R7, LASTSTR // Check remainder
|
||||
BLE index2to16 // Process remainder
|
||||
BR notfound // Not found
|
||||
|
||||
index5plus:
|
||||
CMP R6, $16 // Check for sep > 16
|
||||
BGT index17plus // Handle large sep
|
||||
|
||||
// Assumption is that the separator is smaller than the string at this point
|
||||
index2to16:
|
||||
CMP R7, LASTSTR // Compare last start byte
|
||||
BGT notfound // last takes len(sep) into account
|
||||
|
||||
ADD $19, R7, R9 // To check 4 indices per iteration, need at least 16+3 bytes
|
||||
CMP R9, LASTBYTE
|
||||
// At least 16 bytes of string left
|
||||
// Mask the number of bytes in sep
|
||||
VSPLTISB $0, V10 // Clear
|
||||
BGT index2to16tail
|
||||
|
||||
#ifdef GOPPC64_power10
|
||||
ADD $3,R7, R17 // Base+3
|
||||
ADD $2,R7, R8 // Base+2
|
||||
ADD $1,R7, R10 // Base+1
|
||||
#else
|
||||
MOVD $3, R17 // Number of bytes beyond 16
|
||||
#endif
|
||||
PCALIGN $16
|
||||
|
||||
index2to16loop:
|
||||
|
||||
#ifdef GOPPC64_power10
|
||||
LXVLL R7, R14, V8 // Load next 16 bytes of string from Base
|
||||
LXVLL R10, R14, V9 // Load next 16 bytes of string from Base+1
|
||||
LXVLL R8, R14, V11 // Load next 16 bytes of string from Base+2
|
||||
LXVLL R17,R14, V12 // Load next 16 bytes of string from Base+3
|
||||
#else
|
||||
LXVB16X (R7)(R0), V1 // Load next 16 bytes of string into V1 from R7
|
||||
LXVB16X (R7)(R17), V5 // Load next 16 bytes of string into V5 from R7+3
|
||||
|
||||
VSLDOI $13, V5, V10, V2 // Shift left last 3 bytes
|
||||
VSLDOI $1, V1, V2, V3 // V3=(V1:V2)<<1
|
||||
VSLDOI $2, V1, V2, V4 // V4=(V1:V2)<<2
|
||||
VAND V1, SEPMASK, V8 // Mask out sep size 0th index
|
||||
VAND V3, SEPMASK, V9 // Mask out sep size 1st index
|
||||
VAND V4, SEPMASK, V11 // Mask out sep size 2nd index
|
||||
VAND V5, SEPMASK, V12 // Mask out sep size 3rd index
|
||||
#endif
|
||||
VCMPEQUBCC V0, V8, V8 // compare masked string
|
||||
BLT CR6, found // All equal while comparing 0th index
|
||||
VCMPEQUBCC V0, V9, V9 // compare masked string
|
||||
BLT CR6, found2 // All equal while comparing 1st index
|
||||
VCMPEQUBCC V0, V11, V11 // compare masked string
|
||||
BLT CR6, found3 // All equal while comparing 2nd index
|
||||
VCMPEQUBCC V0, V12, V12 // compare masked string
|
||||
BLT CR6, found4 // All equal while comparing 3rd index
|
||||
|
||||
ADD $4, R7 // Update ptr to next 4 bytes
|
||||
#ifdef GOPPC64_power10
|
||||
ADD $4, R17 // Update ptr to next 4 bytes
|
||||
ADD $4, R8 // Update ptr to next 4 bytes
|
||||
ADD $4, R10 // Update ptr to next 4 bytes
|
||||
#endif
|
||||
CMP R7, LASTSTR // Still less than last start byte
|
||||
BGT notfound // Not found
|
||||
ADD $19, R7, R9 // Verify remaining bytes
|
||||
CMP R9, LASTBYTE // length of string at least 19
|
||||
BLE index2to16loop // Try again, else do post processing and jump to index2to16next
|
||||
PCALIGN $32
|
||||
// <19 bytes left, post process the remaining string
|
||||
index2to16tail:
|
||||
#ifdef GOPPC64_power10
|
||||
index2to16next_p10:
|
||||
LXVLL R7,R14, V1 // Load 16 bytes @R7 into V1
|
||||
VCMPEQUBCC V1, V0, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BLE index2to16next_p10 // If at end, then not found
|
||||
BR notfound // go to remainder loop
|
||||
#else
|
||||
ADD R3, R4, R9 // End of string
|
||||
SUB R7, R9, R9 // Number of bytes left
|
||||
ANDCC $15, R7, R10 // 16 byte offset
|
||||
ADD R10, R9, R11 // offset + len
|
||||
CMP R11, $16 // >= 16?
|
||||
BLE short // Does not cross 16 bytes
|
||||
LXVB16X (R7)(R0), V1 // Load 16 bytes @R7 into V1
|
||||
CMP R9, $16 // Post-processing of unrolled loop
|
||||
BLE index2to16next // continue to index2to16next if <= 16 bytes
|
||||
SUB R16, R9, R10 // R9 should be 18 or 17 hence R10 is 1 or 2
|
||||
LXVB16X (R7)(R10), V9
|
||||
CMP R10, $1 // string length is 17, compare 1 more byte
|
||||
BNE extra2 // string length is 18, compare 2 more bytes
|
||||
VSLDOI $15, V9, V10, V25
|
||||
VAND V1, SEPMASK, V2 // Just compare size of sep
|
||||
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BGT notfound // If at end, then not found
|
||||
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte
|
||||
BR index2to16next // go to remainder loop
|
||||
extra2:
|
||||
VSLDOI $14, V9, V10, V25
|
||||
VAND V1, SEPMASK, V2 // Just compare size of sep
|
||||
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BGT notfound // If at end, then not found
|
||||
VOR V1, V1, V4 // save remaining string
|
||||
VSLDOI $1, V1, V25, V1 // Shift string left by 1 byte for 17th byte
|
||||
VAND V1, SEPMASK, V2 // Just compare size of sep
|
||||
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BGT notfound // If at end, then not found
|
||||
VSLDOI $2, V4, V25, V1 // Shift saved string left by 2 bytes for 18th byte
|
||||
BR index2to16next // Check the remaining partial string in index2to16next
|
||||
|
||||
short:
|
||||
RLDICR $0, R7, $59, R9 // Adjust addr to 16 byte container
|
||||
LXVB16X (R9)(R0), V1 // Load 16 bytes @R9 into V1
|
||||
SLD $3, R10 // Set up shift
|
||||
MTVSRD R10, V8 // Set up shift
|
||||
VSLDOI $8, V8, V8, V8
|
||||
VSLO V1, V8, V1 // Shift by start byte
|
||||
PCALIGN $16
|
||||
index2to16next:
|
||||
VAND V1, SEPMASK, V2 // Just compare size of sep
|
||||
VCMPEQUBCC V0, V2, V3 // Compare sep and partial string
|
||||
BLT CR6, found // Found
|
||||
ADD $1, R7 // Not found, try next partial string
|
||||
CMP R7, LASTSTR // Check for end of string
|
||||
BGT notfound // If at end, then not found
|
||||
VSLDOI $1, V1, V10, V1 // Shift string left by 1 byte
|
||||
BR index2to16next // Check the next partial string
|
||||
#endif // Tail processing if GOPPC64!=power10
|
||||
|
||||
index17plus:
|
||||
CMP R6, $32 // Check if 17 < len(sep) <= 32
|
||||
BGT index33plus
|
||||
SUB $16, R6, R9 // Extra > 16
|
||||
SLD $56, R9, R10 // Shift to use in VSLO
|
||||
MTVSRD R10, V9 // Set up for VSLO
|
||||
LXVB16X (R5)(R9), V1 // Load 16 bytes @R5+R9 into V1
|
||||
VSLO V1, V9, V1 // Shift left
|
||||
VSPLTISB $0xff, V7 // Splat 1s
|
||||
VSPLTISB $0, V27 // Splat 0
|
||||
|
||||
index17to32loop:
|
||||
LXVB16X (R7)(R0), V2 // Load 16 bytes @R7 into V2
|
||||
|
||||
next17:
|
||||
LXVB16X (R7)(R9), V3 // Load 16 bytes @R7+R9 into V3
|
||||
VSLO V3, V9, V3 // Shift left
|
||||
VCMPEQUB V0, V2, V4 // Compare first 16 bytes
|
||||
VCMPEQUB V1, V3, V5 // Compare extra over 16 bytes
|
||||
VAND V4, V5, V6 // Check if both equal
|
||||
VCMPEQUBCC V6, V7, V8 // All equal?
|
||||
BLT CR6, found // Yes
|
||||
ADD $1, R7 // On to next byte
|
||||
CMP R7, LASTSTR // Check if last start byte
|
||||
BGT notfound // If too high, not found
|
||||
BR index17to32loop // Continue
|
||||
|
||||
notfound:
|
||||
MOVD $-1, R3 // Return -1 if not found
|
||||
RET
|
||||
|
||||
index33plus:
|
||||
MOVD $0, (R0) // Case not implemented
|
||||
RET // Crash before return
|
||||
|
||||
foundR25:
|
||||
SRD $3, R25 // Convert from bits to bytes
|
||||
ADD R25, R7 // Add to current string address
|
||||
SUB R3, R7 // Subtract from start of string
|
||||
MOVD R7, R3 // Return byte where found
|
||||
RET
|
||||
found4:
|
||||
ADD $1, R7 // found from unrolled loop at index 3
|
||||
found3:
|
||||
ADD $1, R7 // found from unrolled loop at index 2
|
||||
found2:
|
||||
ADD $1, R7 // found from unrolled loop at index 1
|
||||
found: // found at index 0
|
||||
SUB R3, R7 // Return byte where found
|
||||
MOVD R7, R3
|
||||
RET
|
||||
31
src/internal/bytealg/index_s390x.go
Normal file
31
src/internal/bytealg/index_s390x.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
import "internal/cpu"
|
||||
|
||||
const MaxBruteForce = 64
|
||||
|
||||
func init() {
|
||||
// Note: we're kind of lucky that this flag is available at this point.
|
||||
// The runtime sets HasVX when processing auxv records, and that happens
|
||||
// to happen *before* running the init functions of packages that
|
||||
// the runtime depends on.
|
||||
// TODO: it would really be nicer for internal/cpu to figure out this
|
||||
// flag by itself. Then we wouldn't need to depend on quirks of
|
||||
// early startup initialization order.
|
||||
if cpu.S390X.HasVX {
|
||||
MaxLen = 64
|
||||
}
|
||||
}
|
||||
|
||||
// Cutover reports the number of failures of IndexByte we should tolerate
|
||||
// before switching over to Index.
|
||||
// n is the number of bytes processed so far.
|
||||
// See the bytes.Index implementation for details.
|
||||
func Cutover(n int) int {
|
||||
// 1 error per 8 characters, plus a few slop to start.
|
||||
return (n + 16) / 8
|
||||
}
|
||||
216
src/internal/bytealg/index_s390x.s
Normal file
216
src/internal/bytealg/index_s390x.s
Normal file
@@ -0,0 +1,216 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Caller must confirm availability of vx facility before calling.
|
||||
TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
|
||||
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
||||
LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
||||
MOVD $ret+48(FP), R5
|
||||
BR indexbody<>(SB)
|
||||
|
||||
// Caller must confirm availability of vx facility before calling.
|
||||
TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
|
||||
LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s)
|
||||
LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
|
||||
MOVD $ret+32(FP), R5
|
||||
BR indexbody<>(SB)
|
||||
|
||||
// s: string we are searching
|
||||
// sep: string to search for
|
||||
// R1=&s[0], R2=len(s)
|
||||
// R3=&sep[0], R4=len(sep)
|
||||
// R5=&ret (int)
|
||||
// Caller must confirm availability of vx facility before calling.
|
||||
TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
CMPBGT R4, R2, notfound
|
||||
ADD R1, R2
|
||||
SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
|
||||
CMPBEQ R4, $0, notfound
|
||||
SUB $1, R4 // R4=len(sep)-1 for use as VLL index
|
||||
VLL R4, (R3), V0 // contains first 16 bytes of sep
|
||||
MOVD R1, R7
|
||||
index2plus:
|
||||
CMPBNE R4, $1, index3plus
|
||||
MOVD $15(R7), R9
|
||||
CMPBGE R9, R2, index2to16
|
||||
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
||||
VONE V16
|
||||
VREPH $0, V0, V1
|
||||
CMPBGE R9, R2, index2to16
|
||||
index2loop:
|
||||
VL 0(R7), V2 // 16 bytes, even indices
|
||||
VL 1(R7), V4 // 16 bytes, odd indices
|
||||
VCEQH V1, V2, V5 // compare even indices
|
||||
VCEQH V1, V4, V6 // compare odd indices
|
||||
VSEL V5, V6, V31, V7 // merge even and odd indices
|
||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||
BLT foundV17
|
||||
MOVD $16(R7), R7 // R7+=16
|
||||
ADD $15, R7, R9
|
||||
CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
|
||||
CMPBLE R7, R2, index2to16
|
||||
BR notfound
|
||||
|
||||
index3plus:
|
||||
CMPBNE R4, $2, index4plus
|
||||
ADD $15, R7, R9
|
||||
CMPBGE R9, R2, index2to16
|
||||
MOVD $1, R0
|
||||
VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00...
|
||||
VONE V16
|
||||
VREPH $0, V0, V1
|
||||
VREPB $2, V0, V8
|
||||
index3loop:
|
||||
VL (R7), V2 // load 16-bytes into V2
|
||||
VLL R0, 16(R7), V3 // load 2-bytes into V3
|
||||
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2
|
||||
VCEQH V1, V2, V5 // compare 2-byte even indices
|
||||
VCEQH V1, V4, V6 // compare 2-byte odd indices
|
||||
VCEQB V8, V9, V10 // compare last bytes
|
||||
VSEL V5, V6, V31, V7 // merge even and odd indices
|
||||
VN V7, V10, V7 // AND indices with last byte
|
||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||
BLT foundV17
|
||||
MOVD $16(R7), R7 // R7+=16
|
||||
ADD $15, R7, R9
|
||||
CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
|
||||
CMPBLE R7, R2, index2to16
|
||||
BR notfound
|
||||
|
||||
index4plus:
|
||||
CMPBNE R4, $3, index5plus
|
||||
ADD $15, R7, R9
|
||||
CMPBGE R9, R2, index2to16
|
||||
MOVD $2, R0
|
||||
VGBM $0x8888, V29 // 0xff000000ff000000...
|
||||
VGBM $0x2222, V30 // 0x0000ff000000ff00...
|
||||
VGBM $0xcccc, V31 // 0xffff0000ffff0000...
|
||||
VONE V16
|
||||
VREPF $0, V0, V1
|
||||
index4loop:
|
||||
VL (R7), V2 // load 16-bytes into V2
|
||||
VLL R0, 16(R7), V3 // load 3-bytes into V3
|
||||
VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1
|
||||
VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1
|
||||
VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1
|
||||
VCEQF V1, V2, V5 // compare index 0, 4, ...
|
||||
VCEQF V1, V4, V6 // compare index 1, 5, ...
|
||||
VCEQF V1, V9, V11 // compare index 2, 6, ...
|
||||
VCEQF V1, V10, V12 // compare index 3, 7, ...
|
||||
VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ...
|
||||
VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
|
||||
VSEL V13, V14, V31, V7 // final merge
|
||||
VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found
|
||||
BLT foundV17
|
||||
MOVD $16(R7), R7 // R7+=16
|
||||
ADD $15, R7, R9
|
||||
CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
|
||||
CMPBLE R7, R2, index2to16
|
||||
BR notfound
|
||||
|
||||
index5plus:
|
||||
CMPBGT R4, $15, index17plus
|
||||
index2to16:
|
||||
CMPBGT R7, R2, notfound
|
||||
MOVD $1(R7), R8
|
||||
CMPBGT R8, R2, index2to16tail
|
||||
index2to16loop:
|
||||
// unrolled 2x
|
||||
VLL R4, (R7), V1
|
||||
VLL R4, 1(R7), V2
|
||||
VCEQGS V0, V1, V3
|
||||
BEQ found
|
||||
MOVD $1(R7), R7
|
||||
VCEQGS V0, V2, V4
|
||||
BEQ found
|
||||
MOVD $1(R7), R7
|
||||
CMPBLT R7, R2, index2to16loop
|
||||
CMPBGT R7, R2, notfound
|
||||
index2to16tail:
|
||||
VLL R4, (R7), V1
|
||||
VCEQGS V0, V1, V2
|
||||
BEQ found
|
||||
BR notfound
|
||||
|
||||
index17plus:
|
||||
CMPBGT R4, $31, index33plus
|
||||
SUB $16, R4, R0
|
||||
VLL R0, 16(R3), V1
|
||||
VONE V7
|
||||
index17to32loop:
|
||||
VL (R7), V2
|
||||
VLL R0, 16(R7), V3
|
||||
VCEQG V0, V2, V4
|
||||
VCEQG V1, V3, V5
|
||||
VN V4, V5, V6
|
||||
VCEQGS V6, V7, V8
|
||||
BEQ found
|
||||
MOVD $1(R7), R7
|
||||
CMPBLE R7, R2, index17to32loop
|
||||
BR notfound
|
||||
|
||||
index33plus:
|
||||
CMPBGT R4, $47, index49plus
|
||||
SUB $32, R4, R0
|
||||
VL 16(R3), V1
|
||||
VLL R0, 32(R3), V2
|
||||
VONE V11
|
||||
index33to48loop:
|
||||
VL (R7), V3
|
||||
VL 16(R7), V4
|
||||
VLL R0, 32(R7), V5
|
||||
VCEQG V0, V3, V6
|
||||
VCEQG V1, V4, V7
|
||||
VCEQG V2, V5, V8
|
||||
VN V6, V7, V9
|
||||
VN V8, V9, V10
|
||||
VCEQGS V10, V11, V12
|
||||
BEQ found
|
||||
MOVD $1(R7), R7
|
||||
CMPBLE R7, R2, index33to48loop
|
||||
BR notfound
|
||||
|
||||
index49plus:
|
||||
CMPBGT R4, $63, index65plus
|
||||
SUB $48, R4, R0
|
||||
VL 16(R3), V1
|
||||
VL 32(R3), V2
|
||||
VLL R0, 48(R3), V3
|
||||
VONE V15
|
||||
index49to64loop:
|
||||
VL (R7), V4
|
||||
VL 16(R7), V5
|
||||
VL 32(R7), V6
|
||||
VLL R0, 48(R7), V7
|
||||
VCEQG V0, V4, V8
|
||||
VCEQG V1, V5, V9
|
||||
VCEQG V2, V6, V10
|
||||
VCEQG V3, V7, V11
|
||||
VN V8, V9, V12
|
||||
VN V10, V11, V13
|
||||
VN V12, V13, V14
|
||||
VCEQGS V14, V15, V16
|
||||
BEQ found
|
||||
MOVD $1(R7), R7
|
||||
CMPBLE R7, R2, index49to64loop
|
||||
notfound:
|
||||
MOVD $-1, (R5)
|
||||
RET
|
||||
|
||||
index65plus:
|
||||
// not implemented
|
||||
MOVD $0, (R0)
|
||||
RET
|
||||
|
||||
foundV17: // index is in doubleword V17[0]
|
||||
VLGVG $0, V17, R8
|
||||
ADD R8, R7
|
||||
found:
|
||||
SUB R1, R7
|
||||
MOVD R7, (R5)
|
||||
RET
|
||||
34
src/internal/bytealg/indexbyte_386.s
Normal file
34
src/internal/bytealg/indexbyte_386.s
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT,$0-20
|
||||
MOVL b_base+0(FP), SI
|
||||
MOVL b_len+4(FP), CX
|
||||
MOVB c+12(FP), AL
|
||||
MOVL SI, DI
|
||||
CLD; REPN; SCASB
|
||||
JZ 3(PC)
|
||||
MOVL $-1, ret+16(FP)
|
||||
RET
|
||||
SUBL SI, DI
|
||||
SUBL $1, DI
|
||||
MOVL DI, ret+16(FP)
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
|
||||
MOVL s_base+0(FP), SI
|
||||
MOVL s_len+4(FP), CX
|
||||
MOVB c+8(FP), AL
|
||||
MOVL SI, DI
|
||||
CLD; REPN; SCASB
|
||||
JZ 3(PC)
|
||||
MOVL $-1, ret+12(FP)
|
||||
RET
|
||||
SUBL SI, DI
|
||||
SUBL $1, DI
|
||||
MOVL DI, ret+12(FP)
|
||||
RET
|
||||
154
src/internal/bytealg/indexbyte_amd64.s
Normal file
154
src/internal/bytealg/indexbyte_amd64.s
Normal file
@@ -0,0 +1,154 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB), NOSPLIT, $0-40
|
||||
MOVQ b_base+0(FP), SI
|
||||
MOVQ b_len+8(FP), BX
|
||||
MOVB c+24(FP), AL
|
||||
LEAQ ret+32(FP), R8
|
||||
JMP indexbytebody<>(SB)
|
||||
|
||||
TEXT ·IndexByteString(SB), NOSPLIT, $0-32
|
||||
MOVQ s_base+0(FP), SI
|
||||
MOVQ s_len+8(FP), BX
|
||||
MOVB c+16(FP), AL
|
||||
LEAQ ret+24(FP), R8
|
||||
JMP indexbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// SI: data
|
||||
// BX: data len
|
||||
// AL: byte sought
|
||||
// R8: address to put result
|
||||
TEXT indexbytebody<>(SB), NOSPLIT, $0
|
||||
// Shuffle X0 around so that each byte contains
|
||||
// the character we're looking for.
|
||||
MOVD AX, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PUNPCKLBW X0, X0
|
||||
PSHUFL $0, X0, X0
|
||||
|
||||
CMPQ BX, $16
|
||||
JLT small
|
||||
|
||||
MOVQ SI, DI
|
||||
|
||||
CMPQ BX, $32
|
||||
JA avx2
|
||||
sse:
|
||||
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
|
||||
JMP sseloopentry
|
||||
|
||||
PCALIGN $16
|
||||
sseloop:
|
||||
// Move the next 16-byte chunk of the data into X1.
|
||||
MOVOU (DI), X1
|
||||
// Compare bytes in X0 to X1.
|
||||
PCMPEQB X0, X1
|
||||
// Take the top bit of each byte in X1 and put the result in DX.
|
||||
PMOVMSKB X1, DX
|
||||
// Find first set bit, if any.
|
||||
BSFL DX, DX
|
||||
JNZ ssesuccess
|
||||
// Advance to next block.
|
||||
ADDQ $16, DI
|
||||
sseloopentry:
|
||||
CMPQ DI, AX
|
||||
JB sseloop
|
||||
|
||||
// Search the last 16-byte chunk. This chunk may overlap with the
|
||||
// chunks we've already searched, but that's ok.
|
||||
MOVQ AX, DI
|
||||
MOVOU (AX), X1
|
||||
PCMPEQB X0, X1
|
||||
PMOVMSKB X1, DX
|
||||
BSFL DX, DX
|
||||
JNZ ssesuccess
|
||||
|
||||
failure:
|
||||
MOVQ $-1, (R8)
|
||||
RET
|
||||
|
||||
// We've found a chunk containing the byte.
|
||||
// The chunk was loaded from DI.
|
||||
// The index of the matching byte in the chunk is DX.
|
||||
// The start of the data is SI.
|
||||
ssesuccess:
|
||||
SUBQ SI, DI // Compute offset of chunk within data.
|
||||
ADDQ DX, DI // Add offset of byte within chunk.
|
||||
MOVQ DI, (R8)
|
||||
RET
|
||||
|
||||
// handle for lengths < 16
|
||||
small:
|
||||
TESTQ BX, BX
|
||||
JEQ failure
|
||||
|
||||
// Check if we'll load across a page boundary.
|
||||
LEAQ 16(SI), AX
|
||||
TESTW $0xff0, AX
|
||||
JEQ endofpage
|
||||
|
||||
MOVOU (SI), X1 // Load data
|
||||
PCMPEQB X0, X1 // Compare target byte with each byte in data.
|
||||
PMOVMSKB X1, DX // Move result bits to integer register.
|
||||
BSFL DX, DX // Find first set bit.
|
||||
JZ failure // No set bit, failure.
|
||||
CMPL DX, BX
|
||||
JAE failure // Match is past end of data.
|
||||
MOVQ DX, (R8)
|
||||
RET
|
||||
|
||||
endofpage:
|
||||
MOVOU -16(SI)(BX*1), X1 // Load data into the high end of X1.
|
||||
PCMPEQB X0, X1 // Compare target byte with each byte in data.
|
||||
PMOVMSKB X1, DX // Move result bits to integer register.
|
||||
MOVL BX, CX
|
||||
SHLL CX, DX
|
||||
SHRL $16, DX // Shift desired bits down to bottom of register.
|
||||
BSFL DX, DX // Find first set bit.
|
||||
JZ failure // No set bit, failure.
|
||||
MOVQ DX, (R8)
|
||||
RET
|
||||
|
||||
avx2:
|
||||
#ifndef hasAVX2
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
|
||||
JNE sse
|
||||
#endif
|
||||
MOVD AX, X0
|
||||
LEAQ -32(SI)(BX*1), R11
|
||||
VPBROADCASTB X0, Y1
|
||||
|
||||
PCALIGN $32
|
||||
avx2_loop:
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPTEST Y3, Y3
|
||||
JNZ avx2success
|
||||
ADDQ $32, DI
|
||||
CMPQ DI, R11
|
||||
JLT avx2_loop
|
||||
MOVQ R11, DI
|
||||
VMOVDQU (DI), Y2
|
||||
VPCMPEQB Y1, Y2, Y3
|
||||
VPTEST Y3, Y3
|
||||
JNZ avx2success
|
||||
VZEROUPPER
|
||||
MOVQ $-1, (R8)
|
||||
RET
|
||||
|
||||
avx2success:
|
||||
VPMOVMSKB Y3, DX
|
||||
BSFL DX, DX
|
||||
SUBQ SI, DI
|
||||
ADDQ DI, DX
|
||||
MOVQ DX, (R8)
|
||||
VZEROUPPER
|
||||
RET
|
||||
46
src/internal/bytealg/indexbyte_arm.s
Normal file
46
src/internal/bytealg/indexbyte_arm.s
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT,$0-20
|
||||
MOVW b_base+0(FP), R0
|
||||
MOVW b_len+4(FP), R1
|
||||
MOVBU c+12(FP), R2 // byte to find
|
||||
MOVW $ret+16(FP), R5
|
||||
B indexbytebody<>(SB)
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
|
||||
MOVW s_base+0(FP), R0
|
||||
MOVW s_len+4(FP), R1
|
||||
MOVBU c+8(FP), R2 // byte to find
|
||||
MOVW $ret+12(FP), R5
|
||||
B indexbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// R0: data
|
||||
// R1: data length
|
||||
// R2: byte to find
|
||||
// R5: address to put result
|
||||
TEXT indexbytebody<>(SB),NOSPLIT,$0-0
|
||||
MOVW R0, R4 // store base for later
|
||||
ADD R0, R1 // end
|
||||
|
||||
loop:
|
||||
CMP R0, R1
|
||||
B.EQ notfound
|
||||
MOVBU.P 1(R0), R3
|
||||
CMP R2, R3
|
||||
B.NE loop
|
||||
|
||||
SUB $1, R0 // R0 will be one beyond the position we want
|
||||
SUB R4, R0 // remove base
|
||||
MOVW R0, (R5)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVW $-1, R0
|
||||
MOVW R0, (R5)
|
||||
RET
|
||||
126
src/internal/bytealg/indexbyte_arm64.s
Normal file
126
src/internal/bytealg/indexbyte_arm64.s
Normal file
@@ -0,0 +1,126 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT,$0-40
|
||||
MOVD b_base+0(FP), R0
|
||||
MOVD b_len+8(FP), R2
|
||||
MOVBU c+24(FP), R1
|
||||
MOVD $ret+32(FP), R8
|
||||
B indexbytebody<>(SB)
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT,$0-32
|
||||
MOVD s_base+0(FP), R0
|
||||
MOVD s_len+8(FP), R2
|
||||
MOVBU c+16(FP), R1
|
||||
MOVD $ret+24(FP), R8
|
||||
B indexbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// R0: data
|
||||
// R1: byte to search
|
||||
// R2: data len
|
||||
// R8: address to put result
|
||||
TEXT indexbytebody<>(SB),NOSPLIT,$0
|
||||
// Core algorithm:
|
||||
// For each 32-byte chunk we calculate a 64-bit syndrome value,
|
||||
// with two bits per byte. For each tuple, bit 0 is set if the
|
||||
// relevant byte matched the requested character and bit 1 is
|
||||
// not used (faster than using a 32bit syndrome). Since the bits
|
||||
// in the syndrome reflect exactly the order in which things occur
|
||||
// in the original string, counting trailing zeros allows to
|
||||
// identify exactly which byte has matched.
|
||||
|
||||
CBZ R2, fail
|
||||
MOVD R0, R11
|
||||
// Magic constant 0x40100401 allows us to identify
|
||||
// which lane matches the requested byte.
|
||||
// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
|
||||
// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
|
||||
MOVD $0x40100401, R5
|
||||
VMOV R1, V0.B16
|
||||
// Work with aligned 32-byte chunks
|
||||
BIC $0x1f, R0, R3
|
||||
VMOV R5, V5.S4
|
||||
ANDS $0x1f, R0, R9
|
||||
AND $0x1f, R2, R10
|
||||
BEQ loop
|
||||
|
||||
// Input string is not 32-byte aligned. We calculate the
|
||||
// syndrome value for the aligned 32 bytes block containing
|
||||
// the first bytes and mask off the irrelevant part.
|
||||
VLD1.P (R3), [V1.B16, V2.B16]
|
||||
SUB $0x20, R9, R4
|
||||
ADDS R4, R2, R2
|
||||
VCMEQ V0.B16, V1.B16, V3.B16
|
||||
VCMEQ V0.B16, V2.B16, V4.B16
|
||||
VAND V5.B16, V3.B16, V3.B16
|
||||
VAND V5.B16, V4.B16, V4.B16
|
||||
VADDP V4.B16, V3.B16, V6.B16 // 256->128
|
||||
VADDP V6.B16, V6.B16, V6.B16 // 128->64
|
||||
VMOV V6.D[0], R6
|
||||
// Clear the irrelevant lower bits
|
||||
LSL $1, R9, R4
|
||||
LSR R4, R6, R6
|
||||
LSL R4, R6, R6
|
||||
// The first block can also be the last
|
||||
BLS masklast
|
||||
// Have we found something already?
|
||||
CBNZ R6, tail
|
||||
|
||||
loop:
|
||||
VLD1.P (R3), [V1.B16, V2.B16]
|
||||
SUBS $0x20, R2, R2
|
||||
VCMEQ V0.B16, V1.B16, V3.B16
|
||||
VCMEQ V0.B16, V2.B16, V4.B16
|
||||
// If we're out of data we finish regardless of the result
|
||||
BLS end
|
||||
// Use a fast check for the termination condition
|
||||
VORR V4.B16, V3.B16, V6.B16
|
||||
VADDP V6.D2, V6.D2, V6.D2
|
||||
VMOV V6.D[0], R6
|
||||
// We're not out of data, loop if we haven't found the character
|
||||
CBZ R6, loop
|
||||
|
||||
end:
|
||||
// Termination condition found, let's calculate the syndrome value
|
||||
VAND V5.B16, V3.B16, V3.B16
|
||||
VAND V5.B16, V4.B16, V4.B16
|
||||
VADDP V4.B16, V3.B16, V6.B16
|
||||
VADDP V6.B16, V6.B16, V6.B16
|
||||
VMOV V6.D[0], R6
|
||||
// Only do the clear for the last possible block with less than 32 bytes
|
||||
// Condition flags come from SUBS in the loop
|
||||
BHS tail
|
||||
|
||||
masklast:
|
||||
// Clear the irrelevant upper bits
|
||||
ADD R9, R10, R4
|
||||
AND $0x1f, R4, R4
|
||||
SUB $0x20, R4, R4
|
||||
NEG R4<<1, R4
|
||||
LSL R4, R6, R6
|
||||
LSR R4, R6, R6
|
||||
|
||||
tail:
|
||||
// Check that we have found a character
|
||||
CBZ R6, fail
|
||||
// Count the trailing zeros using bit reversing
|
||||
RBIT R6, R6
|
||||
// Compensate the last post-increment
|
||||
SUB $0x20, R3, R3
|
||||
// And count the leading zeros
|
||||
CLZ R6, R6
|
||||
// R6 is twice the offset into the fragment
|
||||
ADD R6>>1, R3, R0
|
||||
// Compute the offset result
|
||||
SUB R11, R0, R0
|
||||
MOVD R0, (R8)
|
||||
RET
|
||||
|
||||
fail:
|
||||
MOVD $-1, R0
|
||||
MOVD R0, (R8)
|
||||
RET
|
||||
29
src/internal/bytealg/indexbyte_generic.go
Normal file
29
src/internal/bytealg/indexbyte_generic.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Avoid IndexByte and IndexByteString on Plan 9 because it uses
|
||||
// SSE instructions on x86 machines, and those are classified as
|
||||
// floating point instructions, which are illegal in a note handler.
|
||||
|
||||
//go:build !386 && (!amd64 || plan9) && !s390x && !arm && !arm64 && !loong64 && !ppc64 && !ppc64le && !mips && !mipsle && !mips64 && !mips64le && !riscv64 && !wasm
|
||||
|
||||
package bytealg
|
||||
|
||||
func IndexByte(b []byte, c byte) int {
|
||||
for i, x := range b {
|
||||
if x == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func IndexByteString(s string, c byte) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
52
src/internal/bytealg/indexbyte_loong64.s
Normal file
52
src/internal/bytealg/indexbyte_loong64.s
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
|
||||
// R4 = b_base
|
||||
// R5 = b_len
|
||||
// R6 = b_cap (unused)
|
||||
// R7 = byte to find
|
||||
AND $0xff, R7
|
||||
MOVV R4, R6 // store base for later
|
||||
ADDV R4, R5 // end
|
||||
ADDV $-1, R4
|
||||
|
||||
PCALIGN $16
|
||||
loop:
|
||||
ADDV $1, R4
|
||||
BEQ R4, R5, notfound
|
||||
MOVBU (R4), R8
|
||||
BNE R7, R8, loop
|
||||
|
||||
SUBV R6, R4 // remove base
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVV $-1, R4
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
|
||||
// R4 = s_base
|
||||
// R5 = s_len
|
||||
// R6 = byte to find
|
||||
MOVV R4, R7 // store base for later
|
||||
ADDV R4, R5 // end
|
||||
ADDV $-1, R4
|
||||
|
||||
PCALIGN $16
|
||||
loop:
|
||||
ADDV $1, R4
|
||||
BEQ R4, R5, notfound
|
||||
MOVBU (R4), R8
|
||||
BNE R6, R8, loop
|
||||
|
||||
SUBV R7, R4 // remove base
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVV $-1, R4
|
||||
RET
|
||||
54
src/internal/bytealg/indexbyte_mips64x.s
Normal file
54
src/internal/bytealg/indexbyte_mips64x.s
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips64 || mips64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT,$0-40
|
||||
MOVV b_base+0(FP), R1
|
||||
MOVV b_len+8(FP), R2
|
||||
MOVBU c+24(FP), R3 // byte to find
|
||||
MOVV R1, R4 // store base for later
|
||||
ADDV R1, R2 // end
|
||||
ADDV $-1, R1
|
||||
|
||||
loop:
|
||||
ADDV $1, R1
|
||||
BEQ R1, R2, notfound
|
||||
MOVBU (R1), R5
|
||||
BNE R3, R5, loop
|
||||
|
||||
SUBV R4, R1 // remove base
|
||||
MOVV R1, ret+32(FP)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVV $-1, R1
|
||||
MOVV R1, ret+32(FP)
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT,$0-32
|
||||
MOVV s_base+0(FP), R1
|
||||
MOVV s_len+8(FP), R2
|
||||
MOVBU c+16(FP), R3 // byte to find
|
||||
MOVV R1, R4 // store base for later
|
||||
ADDV R1, R2 // end
|
||||
ADDV $-1, R1
|
||||
|
||||
loop:
|
||||
ADDV $1, R1
|
||||
BEQ R1, R2, notfound
|
||||
MOVBU (R1), R5
|
||||
BNE R3, R5, loop
|
||||
|
||||
SUBV R4, R1 // remove base
|
||||
MOVV R1, ret+24(FP)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVV $-1, R1
|
||||
MOVV R1, ret+24(FP)
|
||||
RET
|
||||
52
src/internal/bytealg/indexbyte_mipsx.s
Normal file
52
src/internal/bytealg/indexbyte_mipsx.s
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build mips || mipsle
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT,$0-20
|
||||
MOVW b_base+0(FP), R1
|
||||
MOVW b_len+4(FP), R2
|
||||
MOVBU c+12(FP), R3 // byte to find
|
||||
ADDU $1, R1, R4 // store base+1 for later
|
||||
ADDU R1, R2 // end
|
||||
|
||||
loop:
|
||||
BEQ R1, R2, notfound
|
||||
MOVBU (R1), R5
|
||||
ADDU $1, R1
|
||||
BNE R3, R5, loop
|
||||
|
||||
SUBU R4, R1 // R1 will be one beyond the position we want so remove (base+1)
|
||||
MOVW R1, ret+16(FP)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVW $-1, R1
|
||||
MOVW R1, ret+16(FP)
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT,$0-16
|
||||
MOVW s_base+0(FP), R1
|
||||
MOVW s_len+4(FP), R2
|
||||
MOVBU c+8(FP), R3 // byte to find
|
||||
ADDU $1, R1, R4 // store base+1 for later
|
||||
ADDU R1, R2 // end
|
||||
|
||||
loop:
|
||||
BEQ R1, R2, notfound
|
||||
MOVBU (R1), R5
|
||||
ADDU $1, R1
|
||||
BNE R3, R5, loop
|
||||
|
||||
SUBU R4, R1 // remove (base+1)
|
||||
MOVW R1, ret+12(FP)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVW $-1, R1
|
||||
MOVW R1, ret+12(FP)
|
||||
RET
|
||||
13
src/internal/bytealg/indexbyte_native.go
Normal file
13
src/internal/bytealg/indexbyte_native.go
Normal file
@@ -0,0 +1,13 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build 386 || (amd64 && !plan9) || s390x || arm || arm64 || loong64 || ppc64 || ppc64le || mips || mipsle || mips64 || mips64le || riscv64 || wasm
|
||||
|
||||
package bytealg
|
||||
|
||||
//go:noescape
|
||||
func IndexByte(b []byte, c byte) int
|
||||
|
||||
//go:noescape
|
||||
func IndexByteString(s string, c byte) int
|
||||
314
src/internal/bytealg/indexbyte_ppc64x.s
Normal file
314
src/internal/bytealg/indexbyte_ppc64x.s
Normal file
@@ -0,0 +1,314 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ppc64 || ppc64le
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
|
||||
// R3 = byte array pointer
|
||||
// R4 = length
|
||||
MOVD R6, R5 // R5 = byte
|
||||
BR indexbytebody<>(SB)
|
||||
|
||||
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
|
||||
// R3 = string
|
||||
// R4 = length
|
||||
// R5 = byte
|
||||
BR indexbytebody<>(SB)
|
||||
|
||||
#ifndef GOPPC64_power9
|
||||
#ifdef GOARCH_ppc64le
|
||||
DATA indexbytevbperm<>+0(SB)/8, $0x3830282018100800
|
||||
DATA indexbytevbperm<>+8(SB)/8, $0x7870686058504840
|
||||
#else
|
||||
DATA indexbytevbperm<>+0(SB)/8, $0x0008101820283038
|
||||
DATA indexbytevbperm<>+8(SB)/8, $0x4048505860687078
|
||||
#endif
|
||||
GLOBL indexbytevbperm<>+0(SB), RODATA, $16
|
||||
#endif
|
||||
|
||||
// Some operations are endian specific, choose the correct opcode base on GOARCH.
|
||||
// Note, _VCZBEBB is only available on power9 and newer.
|
||||
#ifdef GOARCH_ppc64le
|
||||
#define _LDBEX MOVDBR
|
||||
#define _LWBEX MOVWBR
|
||||
#define _LHBEX MOVHBR
|
||||
#define _VCZBEBB VCTZLSBB
|
||||
#else
|
||||
#define _LDBEX MOVD
|
||||
#define _LWBEX MOVW
|
||||
#define _LHBEX MOVH
|
||||
#define _VCZBEBB VCLZLSBB
|
||||
#endif
|
||||
|
||||
// R3 = addr of string
|
||||
// R4 = len of string
|
||||
// R5 = byte to find
|
||||
// On exit:
|
||||
// R3 = return value
|
||||
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
CMPU R4,$32
|
||||
|
||||
#ifndef GOPPC64_power9
|
||||
// Load VBPERMQ constant to reduce compare into an ordered bit mask.
|
||||
MOVD $indexbytevbperm<>+00(SB),R16
|
||||
LXVD2X (R16),V0 // Set up swap string
|
||||
#endif
|
||||
|
||||
MTVRD R5,V1
|
||||
VSPLTB $7,V1,V1 // Replicate byte across V1
|
||||
|
||||
BLT cmp16 // Jump to the small string case if it's <32 bytes.
|
||||
|
||||
CMP R4,$64,CR1
|
||||
MOVD $16,R11
|
||||
MOVD R3,R8
|
||||
BLT CR1,cmp32 // Special case for length 32 - 63
|
||||
MOVD $32,R12
|
||||
MOVD $48,R6
|
||||
|
||||
RLDICR $0,R4,$63-6,R9 // R9 = len &^ 63
|
||||
ADD R3,R9,R9 // R9 = &s[len &^ 63]
|
||||
ANDCC $63,R4 // (len &= 63) cmp 0.
|
||||
|
||||
PCALIGN $16
|
||||
loop64:
|
||||
LXVD2X (R0)(R8),V2 // Scan 64 bytes at a time, starting at &s[0]
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0 // Match found at R8, jump out
|
||||
|
||||
LXVD2X (R11)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat1 // Match found at R8+16 bytes, jump out
|
||||
|
||||
LXVD2X (R12)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat2 // Match found at R8+32 bytes, jump out
|
||||
|
||||
LXVD2X (R6)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat3 // Match found at R8+48 bytes, jump out
|
||||
|
||||
ADD $64,R8
|
||||
CMPU R8,R9,CR1
|
||||
BNE CR1,loop64 // R8 != &s[len &^ 63]?
|
||||
|
||||
PCALIGN $32
|
||||
BEQ notfound // Is tail length 0? CR0 is set before entering loop64.
|
||||
|
||||
CMP R4,$32 // Tail length >= 32, use cmp32 path.
|
||||
CMP R4,$16,CR1
|
||||
BGE cmp32
|
||||
|
||||
ADD R8,R4,R9
|
||||
ADD $-16,R9
|
||||
BLE CR1,cmp64_tail_gt0
|
||||
|
||||
cmp64_tail_gt16: // Tail length 17 - 32
|
||||
LXVD2X (R0)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0
|
||||
|
||||
cmp64_tail_gt0: // Tail length 1 - 16
|
||||
MOVD R9,R8
|
||||
LXVD2X (R0)(R9),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0
|
||||
|
||||
BR notfound
|
||||
|
||||
cmp32: // Length 32 - 63
|
||||
|
||||
// Bytes 0 - 15
|
||||
LXVD2X (R0)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0
|
||||
|
||||
// Bytes 16 - 31
|
||||
LXVD2X (R8)(R11),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat1 // Match found at R8+16 bytes, jump out
|
||||
|
||||
BEQ notfound // Is length <= 32? (CR0 holds this comparison on entry to cmp32)
|
||||
CMP R4,$48
|
||||
|
||||
ADD R4,R8,R9 // Compute &s[len(s)-16]
|
||||
ADD $32,R8,R8
|
||||
ADD $-16,R9,R9
|
||||
ISEL CR0GT,R8,R9,R8 // R8 = len(s) <= 48 ? R9 : R8
|
||||
|
||||
// Bytes 33 - 47
|
||||
LXVD2X (R0)(R8),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0 // match found at R8+32 bytes, jump out
|
||||
|
||||
BLE notfound
|
||||
|
||||
// Bytes 48 - 63
|
||||
MOVD R9,R8 // R9 holds the final check.
|
||||
LXVD2X (R0)(R9),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0 // Match found at R8+48 bytes, jump out
|
||||
|
||||
BR notfound
|
||||
|
||||
// If ISA 3.0 instructions are unavailable, we need to account for the extra 16 added by CNTLZW.
|
||||
#ifndef GOPPC64_power9
|
||||
#define ADJUST_FOR_CNTLZW -16
|
||||
#else
|
||||
#define ADJUST_FOR_CNTLZW 0
|
||||
#endif
|
||||
|
||||
// Now, find the index of the 16B vector the match was discovered in. If CNTLZW is used
|
||||
// to determine the offset into the 16B vector, it will overcount by 16. Account for it here.
|
||||
foundat3:
|
||||
SUB R3,R8,R3
|
||||
ADD $48+ADJUST_FOR_CNTLZW,R3
|
||||
BR vfound
|
||||
foundat2:
|
||||
SUB R3,R8,R3
|
||||
ADD $32+ADJUST_FOR_CNTLZW,R3
|
||||
BR vfound
|
||||
foundat1:
|
||||
SUB R3,R8,R3
|
||||
ADD $16+ADJUST_FOR_CNTLZW,R3
|
||||
BR vfound
|
||||
foundat0:
|
||||
SUB R3,R8,R3
|
||||
ADD $0+ADJUST_FOR_CNTLZW,R3
|
||||
vfound:
|
||||
// Map equal values into a 16 bit value with earlier matches setting higher bits.
|
||||
#ifndef GOPPC64_power9
|
||||
VBPERMQ V6,V0,V6
|
||||
MFVRD V6,R4
|
||||
CNTLZW R4,R4
|
||||
#else
|
||||
#ifdef GOARCH_ppc64le
|
||||
// Put the value back into LE ordering by swapping doublewords.
|
||||
XXPERMDI V6,V6,$2,V6
|
||||
#endif
|
||||
_VCZBEBB V6,R4
|
||||
#endif
|
||||
ADD R3,R4,R3
|
||||
RET
|
||||
|
||||
cmp16: // Length 16 - 31
|
||||
CMPU R4,$16
|
||||
ADD R4,R3,R9
|
||||
BLT cmp8
|
||||
|
||||
ADD $-16,R9,R9 // &s[len(s)-16]
|
||||
|
||||
// Bytes 0 - 15
|
||||
LXVD2X (R0)(R3),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
MOVD R3,R8
|
||||
BNE CR6,foundat0 // Match found at R8+32 bytes, jump out
|
||||
|
||||
BEQ notfound
|
||||
|
||||
// Bytes 16 - 30
|
||||
MOVD R9,R8 // R9 holds the final check.
|
||||
LXVD2X (R0)(R9),V2
|
||||
VCMPEQUBCC V2,V1,V6
|
||||
BNE CR6,foundat0 // Match found at R8+48 bytes, jump out
|
||||
|
||||
BR notfound
|
||||
|
||||
|
||||
cmp8: // Length 8 - 15
|
||||
#ifdef GOPPC64_power10
|
||||
// Load all the bytes into a single VSR in BE order.
|
||||
SLD $56,R4,R5
|
||||
LXVLL R3,R5,V2
|
||||
// Compare and count the number which don't match.
|
||||
VCMPEQUB V2,V1,V6
|
||||
VCLZLSBB V6,R3
|
||||
// If count is the number of bytes, or more. No matches are found.
|
||||
CMPU R3,R4
|
||||
MOVD $-1,R5
|
||||
// Otherwise, the count is the index of the first match.
|
||||
ISEL CR0LT,R3,R5,R3
|
||||
RET
|
||||
#else
|
||||
RLDIMI $8,R5,$48,R5 // Replicating the byte across the register.
|
||||
RLDIMI $16,R5,$32,R5
|
||||
RLDIMI $32,R5,$0,R5
|
||||
CMPU R4,$8
|
||||
BLT cmp4
|
||||
MOVD $-8,R11
|
||||
ADD $-8,R4,R4
|
||||
|
||||
_LDBEX (R0)(R3),R10
|
||||
_LDBEX (R11)(R9),R11
|
||||
CMPB R10,R5,R10
|
||||
CMPB R11,R5,R11
|
||||
CMPU R10,$0
|
||||
CMPU R11,$0,CR1
|
||||
CNTLZD R10,R10
|
||||
CNTLZD R11,R11
|
||||
SRD $3,R10,R3
|
||||
SRD $3,R11,R11
|
||||
BNE found
|
||||
|
||||
ADD R4,R11,R4
|
||||
MOVD $-1,R3
|
||||
ISEL CR1EQ,R3,R4,R3
|
||||
RET
|
||||
|
||||
cmp4: // Length 4 - 7
|
||||
CMPU R4,$4
|
||||
BLT cmp2
|
||||
MOVD $-4,R11
|
||||
ADD $-4,R4,R4
|
||||
|
||||
_LWBEX (R0)(R3),R10
|
||||
_LWBEX (R11)(R9),R11
|
||||
CMPB R10,R5,R10
|
||||
CMPB R11,R5,R11
|
||||
CNTLZW R10,R10
|
||||
CNTLZW R11,R11
|
||||
CMPU R10,$32
|
||||
CMPU R11,$32,CR1
|
||||
SRD $3,R10,R3
|
||||
SRD $3,R11,R11
|
||||
BNE found
|
||||
|
||||
ADD R4,R11,R4
|
||||
MOVD $-1,R3
|
||||
ISEL CR1EQ,R3,R4,R3
|
||||
RET
|
||||
|
||||
cmp2: // Length 2 - 3
|
||||
CMPU R4,$2
|
||||
BLT cmp1
|
||||
|
||||
_LHBEX (R0)(R3),R10
|
||||
CMPB R10,R5,R10
|
||||
SLDCC $48,R10,R10
|
||||
CNTLZD R10,R10
|
||||
SRD $3,R10,R3
|
||||
BNE found
|
||||
|
||||
cmp1: // Length 1
|
||||
MOVD $-1,R3
|
||||
ANDCC $1,R4,R31
|
||||
BEQ found
|
||||
|
||||
MOVBZ -1(R9),R10
|
||||
CMPB R10,R5,R10
|
||||
ANDCC $1,R10
|
||||
ADD $-1,R4
|
||||
ISEL CR0EQ,R3,R4,R3
|
||||
|
||||
found:
|
||||
RET
|
||||
#endif
|
||||
|
||||
notfound:
|
||||
MOVD $-1,R3
|
||||
RET
|
||||
|
||||
51
src/internal/bytealg/indexbyte_riscv64.s
Normal file
51
src/internal/bytealg/indexbyte_riscv64.s
Normal file
@@ -0,0 +1,51 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
|
||||
// X10 = b_base
|
||||
// X11 = b_len
|
||||
// X12 = b_cap (unused)
|
||||
// X13 = byte to find
|
||||
AND $0xff, X13
|
||||
MOV X10, X12 // store base for later
|
||||
ADD X10, X11 // end
|
||||
SUB $1, X10
|
||||
|
||||
loop:
|
||||
ADD $1, X10
|
||||
BEQ X10, X11, notfound
|
||||
MOVBU (X10), X14
|
||||
BNE X13, X14, loop
|
||||
|
||||
SUB X12, X10 // remove base
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOV $-1, X10
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
|
||||
// X10 = b_base
|
||||
// X11 = b_len
|
||||
// X12 = byte to find
|
||||
AND $0xff, X12
|
||||
MOV X10, X13 // store base for later
|
||||
ADD X10, X11 // end
|
||||
SUB $1, X10
|
||||
|
||||
loop:
|
||||
ADD $1, X10
|
||||
BEQ X10, X11, notfound
|
||||
MOVBU (X10), X14
|
||||
BNE X12, X14, loop
|
||||
|
||||
SUB X13, X10 // remove base
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOV $-1, X10
|
||||
RET
|
||||
108
src/internal/bytealg/indexbyte_s390x.s
Normal file
108
src/internal/bytealg/indexbyte_s390x.s
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
|
||||
MOVD b_base+0(FP), R3// b_base => R3
|
||||
MOVD b_len+8(FP), R4 // b_len => R4
|
||||
MOVBZ c+24(FP), R5 // c => R5
|
||||
MOVD $ret+32(FP), R2 // &ret => R9
|
||||
BR indexbytebody<>(SB)
|
||||
|
||||
TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
|
||||
MOVD s_base+0(FP), R3// s_base => R3
|
||||
MOVD s_len+8(FP), R4 // s_len => R4
|
||||
MOVBZ c+16(FP), R5 // c => R5
|
||||
MOVD $ret+24(FP), R2 // &ret => R9
|
||||
BR indexbytebody<>(SB)
|
||||
|
||||
// input:
|
||||
// R3: s
|
||||
// R4: s_len
|
||||
// R5: c -- byte sought
|
||||
// R2: &ret -- address to put index into
|
||||
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
|
||||
CMPBEQ R4, $0, notfound
|
||||
MOVD R3, R6 // store base for later
|
||||
ADD R3, R4, R8 // the address after the end of the string
|
||||
//if the length is small, use loop; otherwise, use vector or srst search
|
||||
CMPBGE R4, $16, large
|
||||
|
||||
residual:
|
||||
CMPBEQ R3, R8, notfound
|
||||
MOVBZ 0(R3), R7
|
||||
LA 1(R3), R3
|
||||
CMPBNE R7, R5, residual
|
||||
|
||||
found:
|
||||
SUB R6, R3
|
||||
SUB $1, R3
|
||||
MOVD R3, 0(R2)
|
||||
RET
|
||||
|
||||
notfound:
|
||||
MOVD $-1, 0(R2)
|
||||
RET
|
||||
|
||||
large:
|
||||
MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1
|
||||
CMPBNE R1, $0, vectorimpl
|
||||
|
||||
srstimpl: // no vector facility
|
||||
MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
|
||||
srstloop:
|
||||
WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8))
|
||||
BVS srstloop // interrupted - continue
|
||||
BGT notfoundr0
|
||||
foundr0:
|
||||
XOR R0, R0 // reset R0
|
||||
SUB R6, R8 // remove base
|
||||
MOVD R8, 0(R2)
|
||||
RET
|
||||
notfoundr0:
|
||||
XOR R0, R0 // reset R0
|
||||
MOVD $-1, 0(R2)
|
||||
RET
|
||||
|
||||
vectorimpl:
|
||||
//if the address is not 16byte aligned, use loop for the header
|
||||
MOVD R3, R8
|
||||
AND $15, R8
|
||||
CMPBGT R8, $0, notaligned
|
||||
|
||||
aligned:
|
||||
ADD R6, R4, R8
|
||||
MOVD R8, R7
|
||||
AND $-16, R7
|
||||
// replicate c across V17
|
||||
VLVGB $0, R5, V19
|
||||
VREPB $0, V19, V17
|
||||
|
||||
vectorloop:
|
||||
CMPBGE R3, R7, residual
|
||||
VL 0(R3), V16 // load string to be searched into V16
|
||||
ADD $16, R3
|
||||
VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly
|
||||
BVS vectorloop
|
||||
|
||||
// when vector search found c in the string
|
||||
VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7
|
||||
SUB $16, R3
|
||||
SUB R6, R3
|
||||
ADD R3, R7
|
||||
MOVD R7, 0(R2)
|
||||
RET
|
||||
|
||||
notaligned:
|
||||
MOVD R3, R8
|
||||
AND $-16, R8
|
||||
ADD $16, R8
|
||||
notalignedloop:
|
||||
CMPBEQ R3, R8, aligned
|
||||
MOVBZ 0(R3), R7
|
||||
LA 1(R3), R3
|
||||
CMPBNE R7, R5, notalignedloop
|
||||
BR found
|
||||
195
src/internal/bytealg/indexbyte_wasm.s
Normal file
195
src/internal/bytealg/indexbyte_wasm.s
Normal file
@@ -0,0 +1,195 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·IndexByte(SB), NOSPLIT, $0-40
|
||||
I64Load b_base+0(FP)
|
||||
I32WrapI64
|
||||
I32Load8U c+24(FP)
|
||||
I64Load b_len+8(FP)
|
||||
I32WrapI64
|
||||
Call memchr<>(SB)
|
||||
I64ExtendI32U
|
||||
Set R0
|
||||
|
||||
Get SP
|
||||
I64Const $-1
|
||||
Get R0
|
||||
I64Load b_base+0(FP)
|
||||
I64Sub
|
||||
Get R0
|
||||
I64Eqz $0
|
||||
Select
|
||||
I64Store ret+32(FP)
|
||||
|
||||
RET
|
||||
|
||||
TEXT ·IndexByteString(SB), NOSPLIT, $0-32
|
||||
Get SP
|
||||
I64Load s_base+0(FP)
|
||||
I32WrapI64
|
||||
I32Load8U c+16(FP)
|
||||
I64Load s_len+8(FP)
|
||||
I32WrapI64
|
||||
Call memchr<>(SB)
|
||||
I64ExtendI32U
|
||||
Set R0
|
||||
|
||||
I64Const $-1
|
||||
Get R0
|
||||
I64Load s_base+0(FP)
|
||||
I64Sub
|
||||
Get R0
|
||||
I64Eqz $0
|
||||
Select
|
||||
I64Store ret+24(FP)
|
||||
|
||||
RET
|
||||
|
||||
// initially compiled with emscripten and then modified over time.
|
||||
// params:
|
||||
// R0: s
|
||||
// R1: c
|
||||
// R2: len
|
||||
// ret: index
|
||||
TEXT memchr<>(SB), NOSPLIT, $0
|
||||
Get R1
|
||||
Set R4
|
||||
Block
|
||||
Block
|
||||
Get R2
|
||||
I32Const $0
|
||||
I32Ne
|
||||
Tee R3
|
||||
Get R0
|
||||
I32Const $3
|
||||
I32And
|
||||
I32Const $0
|
||||
I32Ne
|
||||
I32And
|
||||
If
|
||||
Loop
|
||||
Get R0
|
||||
I32Load8U $0
|
||||
Get R1
|
||||
I32Eq
|
||||
BrIf $2
|
||||
Get R2
|
||||
I32Const $-1
|
||||
I32Add
|
||||
Tee R2
|
||||
I32Const $0
|
||||
I32Ne
|
||||
Tee R3
|
||||
Get R0
|
||||
I32Const $1
|
||||
I32Add
|
||||
Tee R0
|
||||
I32Const $3
|
||||
I32And
|
||||
I32Const $0
|
||||
I32Ne
|
||||
I32And
|
||||
BrIf $0
|
||||
End
|
||||
End
|
||||
Get R3
|
||||
BrIf $0
|
||||
I32Const $0
|
||||
Set R1
|
||||
Br $1
|
||||
End
|
||||
Get R0
|
||||
I32Load8U $0
|
||||
Get R4
|
||||
Tee R3
|
||||
I32Eq
|
||||
If
|
||||
Get R2
|
||||
Set R1
|
||||
Else
|
||||
Get R4
|
||||
I32Const $16843009
|
||||
I32Mul
|
||||
Set R4
|
||||
Block
|
||||
Block
|
||||
Get R2
|
||||
I32Const $3
|
||||
I32GtU
|
||||
If
|
||||
Get R2
|
||||
Set R1
|
||||
Loop
|
||||
Get R0
|
||||
I32Load $0
|
||||
Get R4
|
||||
I32Xor
|
||||
Tee R2
|
||||
I32Const $-2139062144
|
||||
I32And
|
||||
I32Const $-2139062144
|
||||
I32Xor
|
||||
Get R2
|
||||
I32Const $-16843009
|
||||
I32Add
|
||||
I32And
|
||||
I32Eqz
|
||||
If
|
||||
Get R0
|
||||
I32Const $4
|
||||
I32Add
|
||||
Set R0
|
||||
Get R1
|
||||
I32Const $-4
|
||||
I32Add
|
||||
Tee R1
|
||||
I32Const $3
|
||||
I32GtU
|
||||
BrIf $1
|
||||
Br $3
|
||||
End
|
||||
End
|
||||
Else
|
||||
Get R2
|
||||
Set R1
|
||||
Br $1
|
||||
End
|
||||
Br $1
|
||||
End
|
||||
Get R1
|
||||
I32Eqz
|
||||
If
|
||||
I32Const $0
|
||||
Set R1
|
||||
Br $3
|
||||
End
|
||||
End
|
||||
Loop
|
||||
Get R0
|
||||
I32Load8U $0
|
||||
Get R3
|
||||
I32Eq
|
||||
BrIf $2
|
||||
Get R0
|
||||
I32Const $1
|
||||
I32Add
|
||||
Set R0
|
||||
Get R1
|
||||
I32Const $-1
|
||||
I32Add
|
||||
Tee R1
|
||||
BrIf $0
|
||||
I32Const $0
|
||||
Set R1
|
||||
End
|
||||
End
|
||||
End
|
||||
Get R0
|
||||
I32Const $0
|
||||
Get R1
|
||||
Select
|
||||
Return
|
||||
23
src/internal/bytealg/lastindexbyte_generic.go
Normal file
23
src/internal/bytealg/lastindexbyte_generic.go
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bytealg
|
||||
|
||||
func LastIndexByte(s []byte, c byte) int {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndexByteString(s string, c byte) int {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
149
src/internal/byteorder/byteorder.go
Normal file
149
src/internal/byteorder/byteorder.go
Normal file
@@ -0,0 +1,149 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package byteorder provides functions for decoding and encoding
|
||||
// little and big endian integer types from/to byte slices.
|
||||
package byteorder
|
||||
|
||||
func LeUint16(b []byte) uint16 {
|
||||
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint16(b[0]) | uint16(b[1])<<8
|
||||
}
|
||||
|
||||
func LePutUint16(b []byte, v uint16) {
|
||||
_ = b[1] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v)
|
||||
b[1] = byte(v >> 8)
|
||||
}
|
||||
|
||||
func LeAppendUint16(b []byte, v uint16) []byte {
|
||||
return append(b,
|
||||
byte(v),
|
||||
byte(v>>8),
|
||||
)
|
||||
}
|
||||
|
||||
func LeUint32(b []byte) uint32 {
|
||||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func LePutUint32(b []byte, v uint32) {
|
||||
_ = b[3] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v)
|
||||
b[1] = byte(v >> 8)
|
||||
b[2] = byte(v >> 16)
|
||||
b[3] = byte(v >> 24)
|
||||
}
|
||||
|
||||
func LeAppendUint32(b []byte, v uint32) []byte {
|
||||
return append(b,
|
||||
byte(v),
|
||||
byte(v>>8),
|
||||
byte(v>>16),
|
||||
byte(v>>24),
|
||||
)
|
||||
}
|
||||
|
||||
func LeUint64(b []byte) uint64 {
|
||||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||
}
|
||||
|
||||
func LePutUint64(b []byte, v uint64) {
|
||||
_ = b[7] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v)
|
||||
b[1] = byte(v >> 8)
|
||||
b[2] = byte(v >> 16)
|
||||
b[3] = byte(v >> 24)
|
||||
b[4] = byte(v >> 32)
|
||||
b[5] = byte(v >> 40)
|
||||
b[6] = byte(v >> 48)
|
||||
b[7] = byte(v >> 56)
|
||||
}
|
||||
|
||||
func LeAppendUint64(b []byte, v uint64) []byte {
|
||||
return append(b,
|
||||
byte(v),
|
||||
byte(v>>8),
|
||||
byte(v>>16),
|
||||
byte(v>>24),
|
||||
byte(v>>32),
|
||||
byte(v>>40),
|
||||
byte(v>>48),
|
||||
byte(v>>56),
|
||||
)
|
||||
}
|
||||
|
||||
func BeUint16(b []byte) uint16 {
|
||||
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint16(b[1]) | uint16(b[0])<<8
|
||||
}
|
||||
|
||||
func BePutUint16(b []byte, v uint16) {
|
||||
_ = b[1] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v >> 8)
|
||||
b[1] = byte(v)
|
||||
}
|
||||
|
||||
func BeAppendUint16(b []byte, v uint16) []byte {
|
||||
return append(b,
|
||||
byte(v>>8),
|
||||
byte(v),
|
||||
)
|
||||
}
|
||||
|
||||
func BeUint32(b []byte) uint32 {
|
||||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
|
||||
}
|
||||
|
||||
func BePutUint32(b []byte, v uint32) {
|
||||
_ = b[3] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v >> 24)
|
||||
b[1] = byte(v >> 16)
|
||||
b[2] = byte(v >> 8)
|
||||
b[3] = byte(v)
|
||||
}
|
||||
|
||||
func BeAppendUint32(b []byte, v uint32) []byte {
|
||||
return append(b,
|
||||
byte(v>>24),
|
||||
byte(v>>16),
|
||||
byte(v>>8),
|
||||
byte(v),
|
||||
)
|
||||
}
|
||||
|
||||
func BeUint64(b []byte) uint64 {
|
||||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
||||
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
|
||||
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
|
||||
}
|
||||
|
||||
func BePutUint64(b []byte, v uint64) {
|
||||
_ = b[7] // early bounds check to guarantee safety of writes below
|
||||
b[0] = byte(v >> 56)
|
||||
b[1] = byte(v >> 48)
|
||||
b[2] = byte(v >> 40)
|
||||
b[3] = byte(v >> 32)
|
||||
b[4] = byte(v >> 24)
|
||||
b[5] = byte(v >> 16)
|
||||
b[6] = byte(v >> 8)
|
||||
b[7] = byte(v)
|
||||
}
|
||||
|
||||
func BeAppendUint64(b []byte, v uint64) []byte {
|
||||
return append(b,
|
||||
byte(v>>56),
|
||||
byte(v>>48),
|
||||
byte(v>>40),
|
||||
byte(v>>32),
|
||||
byte(v>>24),
|
||||
byte(v>>16),
|
||||
byte(v>>8),
|
||||
byte(v),
|
||||
)
|
||||
}
|
||||
72
src/internal/cfg/cfg.go
Normal file
72
src/internal/cfg/cfg.go
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package cfg holds configuration shared by the Go command and internal/testenv.
|
||||
// Definitions that don't need to be exposed outside of cmd/go should be in
|
||||
// cmd/go/internal/cfg instead of this package.
|
||||
package cfg
|
||||
|
||||
// KnownEnv is a list of environment variables that affect the operation
|
||||
// of the Go command.
|
||||
const KnownEnv = `
|
||||
AR
|
||||
CC
|
||||
CGO_CFLAGS
|
||||
CGO_CFLAGS_ALLOW
|
||||
CGO_CFLAGS_DISALLOW
|
||||
CGO_CPPFLAGS
|
||||
CGO_CPPFLAGS_ALLOW
|
||||
CGO_CPPFLAGS_DISALLOW
|
||||
CGO_CXXFLAGS
|
||||
CGO_CXXFLAGS_ALLOW
|
||||
CGO_CXXFLAGS_DISALLOW
|
||||
CGO_ENABLED
|
||||
CGO_FFLAGS
|
||||
CGO_FFLAGS_ALLOW
|
||||
CGO_FFLAGS_DISALLOW
|
||||
CGO_LDFLAGS
|
||||
CGO_LDFLAGS_ALLOW
|
||||
CGO_LDFLAGS_DISALLOW
|
||||
CXX
|
||||
FC
|
||||
GCCGO
|
||||
GO111MODULE
|
||||
GO386
|
||||
GOAMD64
|
||||
GOARCH
|
||||
GOARM
|
||||
GOARM64
|
||||
GOBIN
|
||||
GOCACHE
|
||||
GOCACHEPROG
|
||||
GOENV
|
||||
GOEXE
|
||||
GOEXPERIMENT
|
||||
GOFLAGS
|
||||
GOGCCFLAGS
|
||||
GOHOSTARCH
|
||||
GOHOSTOS
|
||||
GOINSECURE
|
||||
GOMIPS
|
||||
GOMIPS64
|
||||
GOMODCACHE
|
||||
GONOPROXY
|
||||
GONOSUMDB
|
||||
GOOS
|
||||
GOPATH
|
||||
GOPPC64
|
||||
GOPRIVATE
|
||||
GOPROXY
|
||||
GORISCV64
|
||||
GOROOT
|
||||
GOSUMDB
|
||||
GOTMPDIR
|
||||
GOTOOLCHAIN
|
||||
GOTOOLDIR
|
||||
GOVCS
|
||||
GOWASM
|
||||
GOWORK
|
||||
GO_EXTLINK_ENABLED
|
||||
PKG_CONFIG
|
||||
`
|
||||
160
src/internal/chacha8rand/chacha8.go
Normal file
160
src/internal/chacha8rand/chacha8.go
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package chacha8rand implements a pseudorandom generator
|
||||
// based on ChaCha8. It is used by both runtime and math/rand/v2
|
||||
// and must have minimal dependencies.
|
||||
package chacha8rand
|
||||
|
||||
import "internal/byteorder"
|
||||
|
||||
const (
|
||||
ctrInc = 4 // increment counter by 4 between block calls
|
||||
ctrMax = 16 // reseed when counter reaches 16
|
||||
chunk = 32 // each chunk produced by block is 32 uint64s
|
||||
reseed = 4 // reseed with 4 words
|
||||
)
|
||||
|
||||
// block is the chacha8rand block function.
|
||||
func block(seed *[4]uint64, blocks *[32]uint64, counter uint32)
|
||||
|
||||
// A State holds the state for a single random generator.
|
||||
// It must be used from one goroutine at a time.
|
||||
// If used by multiple goroutines at a time, the goroutines
|
||||
// may see the same random values, but the code will not
|
||||
// crash or cause out-of-bounds memory accesses.
|
||||
type State struct {
|
||||
buf [32]uint64
|
||||
seed [4]uint64
|
||||
i uint32
|
||||
n uint32
|
||||
c uint32
|
||||
}
|
||||
|
||||
// Next returns the next random value, along with a boolean
|
||||
// indicating whether one was available.
|
||||
// If one is not available, the caller should call Refill
|
||||
// and then repeat the call to Next.
|
||||
//
|
||||
// Next is //go:nosplit to allow its use in the runtime
|
||||
// with per-m data without holding the per-m lock.
|
||||
//
|
||||
//go:nosplit
|
||||
func (s *State) Next() (uint64, bool) {
|
||||
i := s.i
|
||||
if i >= s.n {
|
||||
return 0, false
|
||||
}
|
||||
s.i = i + 1
|
||||
return s.buf[i&31], true // i&31 eliminates bounds check
|
||||
}
|
||||
|
||||
// Init seeds the State with the given seed value.
|
||||
func (s *State) Init(seed [32]byte) {
|
||||
s.Init64([4]uint64{
|
||||
byteorder.LeUint64(seed[0*8:]),
|
||||
byteorder.LeUint64(seed[1*8:]),
|
||||
byteorder.LeUint64(seed[2*8:]),
|
||||
byteorder.LeUint64(seed[3*8:]),
|
||||
})
|
||||
}
|
||||
|
||||
// Init64 seeds the state with the given seed value.
|
||||
func (s *State) Init64(seed [4]uint64) {
|
||||
s.seed = seed
|
||||
block(&s.seed, &s.buf, 0)
|
||||
s.c = 0
|
||||
s.i = 0
|
||||
s.n = chunk
|
||||
}
|
||||
|
||||
// Refill refills the state with more random values.
|
||||
// After a call to Refill, an immediate call to Next will succeed
|
||||
// (unless multiple goroutines are incorrectly sharing a state).
|
||||
func (s *State) Refill() {
|
||||
s.c += ctrInc
|
||||
if s.c == ctrMax {
|
||||
// Reseed with generated uint64s for forward secrecy.
|
||||
// Normally this is done immediately after computing a block,
|
||||
// but we do it immediately before computing the next block,
|
||||
// to allow a much smaller serialized state (just the seed plus offset).
|
||||
// This gives a delayed benefit for the forward secrecy
|
||||
// (you can reconstruct the recent past given a memory dump),
|
||||
// which we deem acceptable in exchange for the reduced size.
|
||||
s.seed[0] = s.buf[len(s.buf)-reseed+0]
|
||||
s.seed[1] = s.buf[len(s.buf)-reseed+1]
|
||||
s.seed[2] = s.buf[len(s.buf)-reseed+2]
|
||||
s.seed[3] = s.buf[len(s.buf)-reseed+3]
|
||||
s.c = 0
|
||||
}
|
||||
block(&s.seed, &s.buf, s.c)
|
||||
s.i = 0
|
||||
s.n = uint32(len(s.buf))
|
||||
if s.c == ctrMax-ctrInc {
|
||||
s.n = uint32(len(s.buf)) - reseed
|
||||
}
|
||||
}
|
||||
|
||||
// Reseed reseeds the state with new random values.
|
||||
// After a call to Reseed, any previously returned random values
|
||||
// have been erased from the memory of the state and cannot be
|
||||
// recovered.
|
||||
func (s *State) Reseed() {
|
||||
var seed [4]uint64
|
||||
for i := range seed {
|
||||
for {
|
||||
x, ok := s.Next()
|
||||
if ok {
|
||||
seed[i] = x
|
||||
break
|
||||
}
|
||||
s.Refill()
|
||||
}
|
||||
}
|
||||
s.Init64(seed)
|
||||
}
|
||||
|
||||
// Marshal marshals the state into a byte slice.
|
||||
// Marshal and Unmarshal are functions, not methods,
|
||||
// so that they will not be linked into the runtime
|
||||
// when it uses the State struct, since the runtime
|
||||
// does not need these.
|
||||
func Marshal(s *State) []byte {
|
||||
data := make([]byte, 6*8)
|
||||
copy(data, "chacha8:")
|
||||
used := (s.c/ctrInc)*chunk + s.i
|
||||
byteorder.BePutUint64(data[1*8:], uint64(used))
|
||||
for i, seed := range s.seed {
|
||||
byteorder.LePutUint64(data[(2+i)*8:], seed)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
type errUnmarshalChaCha8 struct{}
|
||||
|
||||
func (*errUnmarshalChaCha8) Error() string {
|
||||
return "invalid ChaCha8 encoding"
|
||||
}
|
||||
|
||||
// Unmarshal unmarshals the state from a byte slice.
|
||||
func Unmarshal(s *State, data []byte) error {
|
||||
if len(data) != 6*8 || string(data[:8]) != "chacha8:" {
|
||||
return new(errUnmarshalChaCha8)
|
||||
}
|
||||
used := byteorder.BeUint64(data[1*8:])
|
||||
if used > (ctrMax/ctrInc)*chunk-reseed {
|
||||
return new(errUnmarshalChaCha8)
|
||||
}
|
||||
for i := range s.seed {
|
||||
s.seed[i] = byteorder.LeUint64(data[(2+i)*8:])
|
||||
}
|
||||
s.c = ctrInc * (uint32(used) / chunk)
|
||||
block(&s.seed, &s.buf, s.c)
|
||||
s.i = uint32(used) % chunk
|
||||
s.n = chunk
|
||||
if s.c == ctrMax-ctrInc {
|
||||
s.n = chunk - reseed
|
||||
}
|
||||
return nil
|
||||
}
|
||||
174
src/internal/chacha8rand/chacha8_amd64.s
Normal file
174
src/internal/chacha8rand/chacha8_amd64.s
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// ChaCha8 is ChaCha with 8 rounds.
|
||||
// See https://cr.yp.to/chacha/chacha-20080128.pdf.
|
||||
// See chacha8_generic.go for additional details.
|
||||
|
||||
// ROL rotates the uint32s in register R left by N bits, using temporary T.
|
||||
#define ROL(N, R, T) \
|
||||
MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R
|
||||
|
||||
// ROL16 rotates the uint32s in register R left by 16, using temporary T if needed.
|
||||
#ifdef GOAMD64_v2
|
||||
#define ROL16(R, T) PSHUFB ·rol16<>(SB), R
|
||||
#else
|
||||
#define ROL16(R, T) ROL(16, R, T)
|
||||
#endif
|
||||
|
||||
// ROL8 rotates the uint32s in register R left by 8, using temporary T if needed.
|
||||
#ifdef GOAMD64_v2
|
||||
#define ROL8(R, T) PSHUFB ·rol8<>(SB), R
|
||||
#else
|
||||
#define ROL8(R, T) ROL(8, R, T)
|
||||
#endif
|
||||
|
||||
// QR is the ChaCha quarter-round on A, B, C, and D. T is an available temporary.
|
||||
#define QR(A, B, C, D, T) \
|
||||
PADDD B, A; PXOR A, D; ROL16(D, T); \
|
||||
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B; \
|
||||
PADDD B, A; PXOR A, D; ROL8(D, T); \
|
||||
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B
|
||||
|
||||
// REPLREG replicates the register R into 4 uint32s in XR.
|
||||
#define REPLREG(R, XR) \
|
||||
MOVQ R, XR; \
|
||||
PSHUFD $0, XR, XR
|
||||
|
||||
// REPL replicates the uint32 constant val into 4 uint32s in XR. It smashes DX.
|
||||
#define REPL(val, XR) \
|
||||
MOVL $val, DX; \
|
||||
REPLREG(DX, XR)
|
||||
|
||||
// SEED copies the off'th uint32 of the seed into the register XR,
|
||||
// replicating it into all four stripes of the register.
|
||||
#define SEED(off, reg, XR) \
|
||||
MOVL (4*off)(AX), reg; \
|
||||
REPLREG(reg, XR) \
|
||||
|
||||
// block runs 4 ChaCha8 block transformations in the four stripes of the X registers.
|
||||
|
||||
// func block(seed *[8]uint32, blocks *[16][4]uint32, counter uint32)
|
||||
TEXT ·block<ABIInternal>(SB), NOSPLIT, $16
|
||||
// seed in AX
|
||||
// blocks in BX
|
||||
// counter in CX
|
||||
|
||||
// Load initial constants into top row.
|
||||
REPL(0x61707865, X0)
|
||||
REPL(0x3320646e, X1)
|
||||
REPL(0x79622d32, X2)
|
||||
REPL(0x6b206574, X3)
|
||||
|
||||
// Load counter into bottom left cell.
|
||||
// Each stripe gets a different counter: 0, 1, 2, 3.
|
||||
// (PINSRD is not available in GOAMD64_v1,
|
||||
// so just do it in memory on all systems.
|
||||
// This is not on the critical path.)
|
||||
MOVL CX, 0(SP)
|
||||
INCL CX
|
||||
MOVL CX, 4(SP)
|
||||
INCL CX
|
||||
MOVL CX, 8(SP)
|
||||
INCL CX
|
||||
MOVL CX, 12(SP)
|
||||
MOVOU 0(SP), X12
|
||||
|
||||
// Load seed words into next two rows and into DI, SI, R8..R13
|
||||
SEED(0, DI, X4)
|
||||
SEED(1, SI, X5)
|
||||
SEED(2, R8, X6)
|
||||
SEED(3, R9, X7)
|
||||
SEED(4, R10, X8)
|
||||
SEED(5, R11, X9)
|
||||
SEED(6, R12, X10)
|
||||
SEED(7, R13, X11)
|
||||
|
||||
// Zeros for remaining two matrix entries.
|
||||
// We have just enough XMM registers to hold the state,
|
||||
// without one for the temporary, so we flush and restore
|
||||
// some values to and from memory to provide a temporary.
|
||||
// The initial temporary is X15, so zero its memory instead
|
||||
// of X15 itself.
|
||||
MOVL $0, DX
|
||||
MOVQ DX, X13
|
||||
MOVQ DX, X14
|
||||
MOVOU X14, (15*16)(BX)
|
||||
|
||||
// 4 iterations. Each iteration is 8 quarter-rounds.
|
||||
MOVL $4, DX
|
||||
loop:
|
||||
QR(X0, X4, X8, X12, X15)
|
||||
MOVOU X4, (4*16)(BX) // save X4
|
||||
QR(X1, X5, X9, X13, X15)
|
||||
MOVOU (15*16)(BX), X15 // reload X15; temp now X4
|
||||
QR(X2, X6, X10, X14, X4)
|
||||
QR(X3, X7, X11, X15, X4)
|
||||
|
||||
QR(X0, X5, X10, X15, X4)
|
||||
MOVOU X15, (15*16)(BX) // save X15
|
||||
QR(X1, X6, X11, X12, X4)
|
||||
MOVOU (4*16)(BX), X4 // reload X4; temp now X15
|
||||
QR(X2, X7, X8, X13, X15)
|
||||
QR(X3, X4, X9, X14, X15)
|
||||
|
||||
DECL DX
|
||||
JNZ loop
|
||||
|
||||
// Store interlaced blocks back to output buffer,
|
||||
// adding original seed along the way.
|
||||
|
||||
// First the top and bottom rows.
|
||||
MOVOU X0, (0*16)(BX)
|
||||
MOVOU X1, (1*16)(BX)
|
||||
MOVOU X2, (2*16)(BX)
|
||||
MOVOU X3, (3*16)(BX)
|
||||
MOVOU X12, (12*16)(BX)
|
||||
MOVOU X13, (13*16)(BX)
|
||||
MOVOU X14, (14*16)(BX)
|
||||
// X15 has already been stored.
|
||||
|
||||
// Now we have X0-X3, X12-X15 available for temporaries.
|
||||
// Add seed rows back to output. We left seed in DI, SI, R8..R13 above.
|
||||
REPLREG(DI, X0)
|
||||
REPLREG(SI, X1)
|
||||
REPLREG(R8, X2)
|
||||
REPLREG(R9, X3)
|
||||
REPLREG(R10, X12)
|
||||
REPLREG(R11, X13)
|
||||
REPLREG(R12, X14)
|
||||
REPLREG(R13, X15)
|
||||
PADDD X0, X4
|
||||
PADDD X1, X5
|
||||
PADDD X2, X6
|
||||
PADDD X3, X7
|
||||
PADDD X12, X8
|
||||
PADDD X13, X9
|
||||
PADDD X14, X10
|
||||
PADDD X15, X11
|
||||
MOVOU X4, (4*16)(BX)
|
||||
MOVOU X5, (5*16)(BX)
|
||||
MOVOU X6, (6*16)(BX)
|
||||
MOVOU X7, (7*16)(BX)
|
||||
MOVOU X8, (8*16)(BX)
|
||||
MOVOU X9, (9*16)(BX)
|
||||
MOVOU X10, (10*16)(BX)
|
||||
MOVOU X11, (11*16)(BX)
|
||||
|
||||
MOVL $0, AX
|
||||
MOVQ AX, X15 // must be 0 on return
|
||||
|
||||
RET
|
||||
|
||||
// rotate left 16 indexes for PSHUFB
|
||||
GLOBL ·rol16<>(SB), NOPTR|RODATA, $16
|
||||
DATA ·rol16<>+0(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16<>+8(SB)/8, $0x0D0C0F0E09080B0A
|
||||
|
||||
// rotate left 8 indexes for PSHUFB
|
||||
GLOBL ·rol8<>(SB), NOPTR|RODATA, $16
|
||||
DATA ·rol8<>+0(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8<>+8(SB)/8, $0x0E0D0C0F0A09080B
|
||||
104
src/internal/chacha8rand/chacha8_arm64.s
Normal file
104
src/internal/chacha8rand/chacha8_arm64.s
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// QR is the ChaCha quarter-round on A, B, C, and D.
|
||||
// V30 is used as a temporary, and V31 is assumed to
|
||||
// hold the index table for rotate left 8.
|
||||
#define QR(A, B, C, D) \
|
||||
VADD A.S4, B.S4, A.S4; VEOR D.B16, A.B16, D.B16; VREV32 D.H8, D.H8; \
|
||||
VADD C.S4, D.S4, C.S4; VEOR B.B16, C.B16, V30.B16; VSHL $12, V30.S4, B.S4; VSRI $20, V30.S4, B.S4 \
|
||||
VADD A.S4, B.S4, A.S4; VEOR D.B16, A.B16, D.B16; VTBL V31.B16, [D.B16], D.B16; \
|
||||
VADD C.S4, D.S4, C.S4; VEOR B.B16, C.B16, V30.B16; VSHL $7, V30.S4, B.S4; VSRI $25, V30.S4, B.S4
|
||||
|
||||
// block runs 4 ChaCha8 block transformations in the four stripes of the V registers.
|
||||
|
||||
// func block(seed *[8]uint32, blocks *[4][16]uint32, counter uint32)
|
||||
TEXT ·block<ABIInternal>(SB), NOSPLIT, $16
|
||||
// seed in R0
|
||||
// blocks in R1
|
||||
// counter in R2
|
||||
|
||||
// Load initial constants into top row.
|
||||
MOVD $·chachaConst(SB), R10
|
||||
VLD4R (R10), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||
|
||||
// Load increment and rotate 8 constants into V30, V31.
|
||||
MOVD $·chachaIncRot(SB), R11
|
||||
VLD1 (R11), [V30.S4, V31.S4]
|
||||
|
||||
VLD4R.P 16(R0), [V4.S4, V5.S4, V6.S4, V7.S4]
|
||||
VLD4R.P 16(R0), [V8.S4, V9.S4, V10.S4, V11.S4]
|
||||
|
||||
// store counter to memory to replicate its uint32 halfs back out
|
||||
MOVW R2, 0(RSP)
|
||||
VLD1R 0(RSP), [V12.S4]
|
||||
|
||||
// Add 0, 1, 2, 3 to counter stripes.
|
||||
VADD V30.S4, V12.S4, V12.S4
|
||||
|
||||
// Zeros for remaining two matrix entries.
|
||||
VEOR V13.B16, V13.B16, V13.B16
|
||||
VEOR V14.B16, V14.B16, V14.B16
|
||||
VEOR V15.B16, V15.B16, V15.B16
|
||||
|
||||
// Save seed state for adding back later.
|
||||
VMOV V4.B16, V20.B16
|
||||
VMOV V5.B16, V21.B16
|
||||
VMOV V6.B16, V22.B16
|
||||
VMOV V7.B16, V23.B16
|
||||
VMOV V8.B16, V24.B16
|
||||
VMOV V9.B16, V25.B16
|
||||
VMOV V10.B16, V26.B16
|
||||
VMOV V11.B16, V27.B16
|
||||
|
||||
// 4 iterations. Each iteration is 8 quarter-rounds.
|
||||
MOVD $4, R0
|
||||
loop:
|
||||
QR(V0, V4, V8, V12)
|
||||
QR(V1, V5, V9, V13)
|
||||
QR(V2, V6, V10, V14)
|
||||
QR(V3, V7, V11, V15)
|
||||
|
||||
QR(V0, V5, V10, V15)
|
||||
QR(V1, V6, V11, V12)
|
||||
QR(V2, V7, V8, V13)
|
||||
QR(V3, V4, V9, V14)
|
||||
|
||||
SUB $1, R0
|
||||
CBNZ R0, loop
|
||||
|
||||
// Add seed back.
|
||||
VADD V4.S4, V20.S4, V4.S4
|
||||
VADD V5.S4, V21.S4, V5.S4
|
||||
VADD V6.S4, V22.S4, V6.S4
|
||||
VADD V7.S4, V23.S4, V7.S4
|
||||
VADD V8.S4, V24.S4, V8.S4
|
||||
VADD V9.S4, V25.S4, V9.S4
|
||||
VADD V10.S4, V26.S4, V10.S4
|
||||
VADD V11.S4, V27.S4, V11.S4
|
||||
|
||||
// Store interlaced blocks back to output buffer.
|
||||
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R1)
|
||||
VST1.P [ V4.B16, V5.B16, V6.B16, V7.B16], 64(R1)
|
||||
VST1.P [ V8.B16, V9.B16, V10.B16, V11.B16], 64(R1)
|
||||
VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R1)
|
||||
RET
|
||||
|
||||
GLOBL ·chachaConst(SB), NOPTR|RODATA, $32
|
||||
DATA ·chachaConst+0x00(SB)/4, $0x61707865
|
||||
DATA ·chachaConst+0x04(SB)/4, $0x3320646e
|
||||
DATA ·chachaConst+0x08(SB)/4, $0x79622d32
|
||||
DATA ·chachaConst+0x0c(SB)/4, $0x6b206574
|
||||
|
||||
GLOBL ·chachaIncRot(SB), NOPTR|RODATA, $32
|
||||
DATA ·chachaIncRot+0x00(SB)/4, $0x00000000
|
||||
DATA ·chachaIncRot+0x04(SB)/4, $0x00000001
|
||||
DATA ·chachaIncRot+0x08(SB)/4, $0x00000002
|
||||
DATA ·chachaIncRot+0x0c(SB)/4, $0x00000003
|
||||
DATA ·chachaIncRot+0x10(SB)/4, $0x02010003
|
||||
DATA ·chachaIncRot+0x14(SB)/4, $0x06050407
|
||||
DATA ·chachaIncRot+0x18(SB)/4, $0x0A09080B
|
||||
DATA ·chachaIncRot+0x1c(SB)/4, $0x0E0D0C0F
|
||||
235
src/internal/chacha8rand/chacha8_generic.go
Normal file
235
src/internal/chacha8rand/chacha8_generic.go
Normal file
@@ -0,0 +1,235 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// ChaCha8 is ChaCha with 8 rounds.
|
||||
// See https://cr.yp.to/chacha/chacha-20080128.pdf.
|
||||
//
|
||||
// ChaCha8 operates on a 4x4 matrix of uint32 values, initially set to:
|
||||
//
|
||||
// const1 const2 const3 const4
|
||||
// seed seed seed seed
|
||||
// seed seed seed seed
|
||||
// counter64 0 0
|
||||
//
|
||||
// We use the same constants as ChaCha20 does, a random seed,
|
||||
// and a counter. Running ChaCha8 on this input produces
|
||||
// a 4x4 matrix of pseudo-random values with as much entropy
|
||||
// as the seed.
|
||||
//
|
||||
// Given SIMD registers that can hold N uint32s, it is possible
|
||||
// to run N ChaCha8 block transformations in parallel by filling
|
||||
// the first register with the N copies of const1, the second
|
||||
// with N copies of const2, and so on, and then running the operations.
|
||||
//
|
||||
// Each iteration of ChaCha8Rand operates over 32 bytes of input and
|
||||
// produces 992 bytes of RNG output, plus 32 bytes of input for the next
|
||||
// iteration.
|
||||
//
|
||||
// The 32 bytes of input are used as a ChaCha8 key, with a zero nonce, to
|
||||
// produce 1024 bytes of output (16 blocks, with counters 0 to 15).
|
||||
// First, for each block, the values 0x61707865, 0x3320646e, 0x79622d32,
|
||||
// 0x6b206574 are subtracted from the 32-bit little-endian words at
|
||||
// position 0, 1, 2, and 3 respectively, and an increasing counter
|
||||
// starting at zero is subtracted from each word at position 12. Then,
|
||||
// this stream is permuted such that for each sequence of four blocks,
|
||||
// first we output the first four bytes of each block, then the next four
|
||||
// bytes of each block, and so on. Finally, the last 32 bytes of output
|
||||
// are used as the input of the next iteration, and the remaining 992
|
||||
// bytes are the RNG output.
|
||||
//
|
||||
// See https://c2sp.org/chacha8rand for additional details.
|
||||
//
|
||||
// Normal ChaCha20 implementations for encryption use this same
|
||||
// parallelism but then have to deinterlace the results so that
|
||||
// it appears the blocks were generated separately. For the purposes
|
||||
// of generating random numbers, the interlacing is fine.
|
||||
// We are simply locked in to preserving the 4-way interlacing
|
||||
// in any future optimizations.
|
||||
package chacha8rand
|
||||
|
||||
import (
|
||||
"internal/goarch"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// setup sets up 4 ChaCha8 blocks in b32 with the counter and seed.
|
||||
// Note that b32 is [16][4]uint32 not [4][16]uint32: the blocks are interlaced
|
||||
// the same way they would be in a 4-way SIMD implementations.
|
||||
func setup(seed *[4]uint64, b32 *[16][4]uint32, counter uint32) {
|
||||
// Convert to uint64 to do half as many stores to memory.
|
||||
b := (*[16][2]uint64)(unsafe.Pointer(b32))
|
||||
|
||||
// Constants; same as in ChaCha20: "expand 32-byte k"
|
||||
b[0][0] = 0x61707865_61707865
|
||||
b[0][1] = 0x61707865_61707865
|
||||
|
||||
b[1][0] = 0x3320646e_3320646e
|
||||
b[1][1] = 0x3320646e_3320646e
|
||||
|
||||
b[2][0] = 0x79622d32_79622d32
|
||||
b[2][1] = 0x79622d32_79622d32
|
||||
|
||||
b[3][0] = 0x6b206574_6b206574
|
||||
b[3][1] = 0x6b206574_6b206574
|
||||
|
||||
// Seed values.
|
||||
var x64 uint64
|
||||
var x uint32
|
||||
|
||||
x = uint32(seed[0])
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[4][0] = x64
|
||||
b[4][1] = x64
|
||||
|
||||
x = uint32(seed[0] >> 32)
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[5][0] = x64
|
||||
b[5][1] = x64
|
||||
|
||||
x = uint32(seed[1])
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[6][0] = x64
|
||||
b[6][1] = x64
|
||||
|
||||
x = uint32(seed[1] >> 32)
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[7][0] = x64
|
||||
b[7][1] = x64
|
||||
|
||||
x = uint32(seed[2])
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[8][0] = x64
|
||||
b[8][1] = x64
|
||||
|
||||
x = uint32(seed[2] >> 32)
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[9][0] = x64
|
||||
b[9][1] = x64
|
||||
|
||||
x = uint32(seed[3])
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[10][0] = x64
|
||||
b[10][1] = x64
|
||||
|
||||
x = uint32(seed[3] >> 32)
|
||||
x64 = uint64(x)<<32 | uint64(x)
|
||||
b[11][0] = x64
|
||||
b[11][1] = x64
|
||||
|
||||
// Counters.
|
||||
if goarch.BigEndian {
|
||||
b[12][0] = uint64(counter+0)<<32 | uint64(counter+1)
|
||||
b[12][1] = uint64(counter+2)<<32 | uint64(counter+3)
|
||||
} else {
|
||||
b[12][0] = uint64(counter+0) | uint64(counter+1)<<32
|
||||
b[12][1] = uint64(counter+2) | uint64(counter+3)<<32
|
||||
}
|
||||
|
||||
// Zeros.
|
||||
b[13][0] = 0
|
||||
b[13][1] = 0
|
||||
b[14][0] = 0
|
||||
b[14][1] = 0
|
||||
|
||||
b[15][0] = 0
|
||||
b[15][1] = 0
|
||||
}
|
||||
|
||||
func _() {
|
||||
// block and block_generic must have same type
|
||||
x := block
|
||||
x = block_generic
|
||||
_ = x
|
||||
}
|
||||
|
||||
// block_generic is the non-assembly block implementation,
|
||||
// for use on systems without special assembly.
|
||||
// Even on such systems, it is quite fast: on GOOS=386,
|
||||
// ChaCha8 using this code generates random values faster than PCG-DXSM.
|
||||
func block_generic(seed *[4]uint64, buf *[32]uint64, counter uint32) {
|
||||
b := (*[16][4]uint32)(unsafe.Pointer(buf))
|
||||
|
||||
setup(seed, b, counter)
|
||||
|
||||
for i := range b[0] {
|
||||
// Load block i from b[*][i] into local variables.
|
||||
b0 := b[0][i]
|
||||
b1 := b[1][i]
|
||||
b2 := b[2][i]
|
||||
b3 := b[3][i]
|
||||
b4 := b[4][i]
|
||||
b5 := b[5][i]
|
||||
b6 := b[6][i]
|
||||
b7 := b[7][i]
|
||||
b8 := b[8][i]
|
||||
b9 := b[9][i]
|
||||
b10 := b[10][i]
|
||||
b11 := b[11][i]
|
||||
b12 := b[12][i]
|
||||
b13 := b[13][i]
|
||||
b14 := b[14][i]
|
||||
b15 := b[15][i]
|
||||
|
||||
// 4 iterations of eight quarter-rounds each is 8 rounds
|
||||
for round := 0; round < 4; round++ {
|
||||
b0, b4, b8, b12 = qr(b0, b4, b8, b12)
|
||||
b1, b5, b9, b13 = qr(b1, b5, b9, b13)
|
||||
b2, b6, b10, b14 = qr(b2, b6, b10, b14)
|
||||
b3, b7, b11, b15 = qr(b3, b7, b11, b15)
|
||||
|
||||
b0, b5, b10, b15 = qr(b0, b5, b10, b15)
|
||||
b1, b6, b11, b12 = qr(b1, b6, b11, b12)
|
||||
b2, b7, b8, b13 = qr(b2, b7, b8, b13)
|
||||
b3, b4, b9, b14 = qr(b3, b4, b9, b14)
|
||||
}
|
||||
|
||||
// Store block i back into b[*][i].
|
||||
// Add b4..b11 back to the original key material,
|
||||
// like in ChaCha20, to avoid trivial invertibility.
|
||||
// There is no entropy in b0..b3 and b12..b15
|
||||
// so we can skip the additions and save some time.
|
||||
b[0][i] = b0
|
||||
b[1][i] = b1
|
||||
b[2][i] = b2
|
||||
b[3][i] = b3
|
||||
b[4][i] += b4
|
||||
b[5][i] += b5
|
||||
b[6][i] += b6
|
||||
b[7][i] += b7
|
||||
b[8][i] += b8
|
||||
b[9][i] += b9
|
||||
b[10][i] += b10
|
||||
b[11][i] += b11
|
||||
b[12][i] = b12
|
||||
b[13][i] = b13
|
||||
b[14][i] = b14
|
||||
b[15][i] = b15
|
||||
}
|
||||
|
||||
if goarch.BigEndian {
|
||||
// On a big-endian system, reading the uint32 pairs as uint64s
|
||||
// will word-swap them compared to little-endian, so we word-swap
|
||||
// them here first to make the next swap get the right answer.
|
||||
for i, x := range buf {
|
||||
buf[i] = x>>32 | x<<32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qr is the (inlinable) ChaCha8 quarter round.
|
||||
func qr(a, b, c, d uint32) (_a, _b, _c, _d uint32) {
|
||||
a += b
|
||||
d ^= a
|
||||
d = d<<16 | d>>16
|
||||
c += d
|
||||
b ^= c
|
||||
b = b<<12 | b>>20
|
||||
a += b
|
||||
d ^= a
|
||||
d = d<<8 | d>>24
|
||||
c += d
|
||||
b ^= c
|
||||
b = b<<7 | b>>25
|
||||
return a, b, c, d
|
||||
}
|
||||
12
src/internal/chacha8rand/chacha8_stub.s
Normal file
12
src/internal/chacha8rand/chacha8_stub.s
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !amd64 && !arm64
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func block(counter uint64, seed *[8]uint32, blocks *[16][4]uint32)
|
||||
TEXT ·block(SB), NOSPLIT, $0
|
||||
JMP ·block_generic(SB)
|
||||
|
||||
12
src/internal/chacha8rand/export_test.go
Normal file
12
src/internal/chacha8rand/export_test.go
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package chacha8rand
|
||||
|
||||
var Block = block
|
||||
var Block_generic = block_generic
|
||||
|
||||
func Seed(s *State) [4]uint64 {
|
||||
return s.seed
|
||||
}
|
||||
202
src/internal/chacha8rand/rand_test.go
Normal file
202
src/internal/chacha8rand/rand_test.go
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package chacha8rand_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
. "internal/chacha8rand"
|
||||
"slices"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestOutput(t *testing.T) {
|
||||
var s State
|
||||
s.Init(seed)
|
||||
for i := range output {
|
||||
for {
|
||||
x, ok := s.Next()
|
||||
if ok {
|
||||
if x != output[i] {
|
||||
t.Errorf("#%d: have %#x want %#x", i, x, output[i])
|
||||
}
|
||||
break
|
||||
}
|
||||
s.Refill()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshal(t *testing.T) {
|
||||
var s State
|
||||
s.Init(seed)
|
||||
for i := range output {
|
||||
for {
|
||||
b := Marshal(&s)
|
||||
s = State{}
|
||||
err := Unmarshal(&s, b)
|
||||
if err != nil {
|
||||
t.Fatalf("#%d: Unmarshal: %v", i, err)
|
||||
}
|
||||
x, ok := s.Next()
|
||||
if ok {
|
||||
if x != output[i] {
|
||||
t.Fatalf("#%d: have %#x want %#x", i, x, output[i])
|
||||
}
|
||||
break
|
||||
}
|
||||
s.Refill()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReseed(t *testing.T) {
|
||||
var s State
|
||||
s.Init(seed)
|
||||
old := Seed(&s)
|
||||
s.Reseed()
|
||||
if Seed(&s) == old {
|
||||
t.Errorf("Reseed did not change seed")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBlock(b *testing.B) {
|
||||
var seed [4]uint64
|
||||
var blocks [32]uint64
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
Block(&seed, &blocks, 0)
|
||||
}
|
||||
b.SetBytes(32 * 8)
|
||||
}
|
||||
|
||||
func TestBlockGeneric(t *testing.T) {
|
||||
var b1, b2 [32]uint64
|
||||
s := seed // byte seed
|
||||
seed := [4]uint64{
|
||||
binary.LittleEndian.Uint64(s[0*8:]),
|
||||
binary.LittleEndian.Uint64(s[1*8:]),
|
||||
binary.LittleEndian.Uint64(s[2*8:]),
|
||||
binary.LittleEndian.Uint64(s[3*8:]),
|
||||
}
|
||||
|
||||
Block(&seed, &b1, 4)
|
||||
Block_generic(&seed, &b2, 4)
|
||||
if !slices.Equal(b1[:], b2[:]) {
|
||||
var out bytes.Buffer
|
||||
fmt.Fprintf(&out, "%-18s %-18s\n", "block", "block_generic")
|
||||
for i := range b1 {
|
||||
suffix := ""
|
||||
if b1[i] != b2[i] {
|
||||
suffix = " mismatch!"
|
||||
}
|
||||
fmt.Fprintf(&out, "%#016x %#016x%s\n", b1[i], b2[i], suffix)
|
||||
}
|
||||
t.Errorf("block and block_generic disagree:\n%s", out.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Golden output test to make sure algorithm never changes,
|
||||
// so that its use in math/rand/v2 stays stable.
|
||||
// See https://c2sp.org/chacha8rand.
|
||||
|
||||
var seed = [32]byte([]byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ123456"))
|
||||
|
||||
var output = []uint64{
|
||||
0xb773b6063d4616a5, 0x1160af22a66abc3c, 0x8c2599d9418d287c, 0x7ee07e037edc5cd6,
|
||||
0xcfaa9ee02d1c16ad, 0x0e090eef8febea79, 0x3c82d271128b5b3e, 0x9c5addc11252a34f,
|
||||
0xdf79bb617d6ceea6, 0x36d553591f9d736a, 0xeef0d14e181ee01f, 0x089bfc760ae58436,
|
||||
0xd9e52b59cc2ad268, 0xeb2fb4444b1b8aba, 0x4f95c8a692c46661, 0xc3c6323217cae62c,
|
||||
0x91ebb4367f4e2e7e, 0x784cf2c6a0ec9bc6, 0x5c34ec5c34eabe20, 0x4f0a8f515570daa8,
|
||||
0xfc35dcb4113d6bf2, 0x5b0da44c645554bc, 0x6d963da3db21d9e1, 0xeeaefc3150e500f3,
|
||||
0x2d37923dda3750a5, 0x380d7a626d4bc8b0, 0xeeaf68ede3d7ee49, 0xf4356695883b717c,
|
||||
0x846a9021392495a4, 0x8e8510549630a61b, 0x18dc02545dbae493, 0x0f8f9ff0a65a3d43,
|
||||
0xccf065f7190ff080, 0xfd76d1aa39673330, 0x95d232936cba6433, 0x6c7456d1070cbd17,
|
||||
0x462acfdaff8c6562, 0x5bafab866d34fc6a, 0x0c862f78030a2988, 0xd39a83e407c3163d,
|
||||
0xc00a2b7b45f22ebf, 0x564307c62466b1a9, 0x257e0424b0c072d4, 0x6fb55e99496c28fe,
|
||||
0xae9873a88f5cd4e0, 0x4657362ac60d3773, 0x1c83f91ecdf23e8e, 0x6fdc0792c15387c0,
|
||||
0x36dad2a30dfd2b5c, 0xa4b593290595bdb7, 0x4de18934e4cc02c5, 0xcdc0d604f015e3a7,
|
||||
0xfba0dbf69ad80321, 0x60e8bea3d139de87, 0xd18a4d851ef48756, 0x6366447c2215f34a,
|
||||
0x05682e97d3d007ee, 0x4c0e8978c6d54ab2, 0xcf1e9f6a6712edc2, 0x061439414c80cfd3,
|
||||
0xd1a8b6e2745c0ead, 0x31a7918d45c410e8, 0xabcc61ad90216eec, 0x4040d92d2032a71a,
|
||||
0x3cd2f66ffb40cd68, 0xdcd051c07295857a, 0xeab55cbcd9ab527e, 0x18471dce781bdaac,
|
||||
0xf7f08cd144dc7252, 0x5804e0b13d7f40d1, 0x5cb1a446e4b2d35b, 0xe6d4a728d2138a06,
|
||||
0x05223e40ca60dad8, 0x2d61ec3206ac6a68, 0xab692356874c17b8, 0xc30954417676de1c,
|
||||
0x4f1ace3732225624, 0xfba9510813988338, 0x997f200f52752e11, 0x1116aaafe86221fa,
|
||||
0x07ce3b5cb2a13519, 0x2956bc72bc458314, 0x4188b7926140eb78, 0x56ca6dbfd4adea4d,
|
||||
0x7fe3c22349340ce5, 0x35c08f9c37675f8a, 0x11e1c7fbef5ed521, 0x98adc8464ec1bc75,
|
||||
0xd163b2c73d1203f8, 0x8c761ee043a2f3f3, 0x24b99d6accecd7b7, 0x793e31aa112f0370,
|
||||
0x8e87dc2a19285139, 0x4247ae04f7096e25, 0x514f3122926fe20f, 0xdc6fb3f045d2a7e9,
|
||||
0x15cb30cecdd18eba, 0xcbc7fdecf6900274, 0x3fb5c696dc8ba021, 0xd1664417c8d274e6,
|
||||
0x05f7e445ea457278, 0xf920bbca1b9db657, 0x0c1950b4da22cb99, 0xf875baf1af09e292,
|
||||
0xbed3d7b84250f838, 0xf198e8080fd74160, 0xc9eda51d9b7ea703, 0xf709ef55439bf8f6,
|
||||
0xd20c74feebf116fc, 0x305668eb146d7546, 0x829af3ec10d89787, 0x15b8f9697b551dbc,
|
||||
0xfc823c6c8e64b8c9, 0x345585e8183b40bc, 0x674b4171d6581368, 0x1234d81cd670e9f7,
|
||||
0x0e505210d8a55e19, 0xe8258d69eeeca0dc, 0x05d4c452e8baf67e, 0xe8dbe30116a45599,
|
||||
0x1cf08ce1b1176f00, 0xccf7d0a4b81ecb49, 0x303fea136b2c430e, 0x861d6c139c06c871,
|
||||
0x5f41df72e05e0487, 0x25bd7e1e1ae26b1d, 0xbe9f4004d662a41d, 0x65bf58d483188546,
|
||||
0xd1b27cff69db13cc, 0x01a6663372c1bb36, 0x578dd7577b727f4d, 0x19c78f066c083cf6,
|
||||
0xdbe014d4f9c391bb, 0x97fbb2dd1d13ffb3, 0x31c91e0af9ef8d4f, 0x094dfc98402a43ba,
|
||||
0x069bd61bea37b752, 0x5b72d762e8d986ca, 0x72ee31865904bc85, 0xd1f5fdc5cd36c33e,
|
||||
0xba9b4980a8947cad, 0xece8f05eac49ab43, 0x65fe1184abae38e7, 0x2d7cb9dea5d31452,
|
||||
0xcc71489476e467e3, 0x4c03a258a578c68c, 0x00efdf9ecb0fd8fc, 0x9924cad471e2666d,
|
||||
0x87f8668318f765e9, 0xcb4dc57c1b55f5d8, 0xd373835a86604859, 0xe526568b5540e482,
|
||||
0x1f39040f08586fec, 0xb764f3f00293f8e6, 0x049443a2f6bd50a8, 0x76fec88697d3941a,
|
||||
0x3efb70d039bae7a2, 0xe2f4611368eca8a8, 0x7c007a96e01d2425, 0xbbcce5768e69c5bf,
|
||||
0x784fb4985c42aac3, 0xf72b5091aa223874, 0x3630333fb1e62e07, 0x8e7319ebdebbb8de,
|
||||
0x2a3982bca959fa00, 0xb2b98b9f964ba9b3, 0xf7e31014adb71951, 0xebd0fca3703acc82,
|
||||
0xec654e2a2fe6419a, 0xb326132d55a52e2c, 0x2248c57f44502978, 0x32710c2f342daf16,
|
||||
0x0517b47b5acb2bec, 0x4c7a718fca270937, 0xd69142bed0bcc541, 0xe40ebcb8ff52ce88,
|
||||
0x3e44a2dbc9f828d4, 0xc74c2f4f8f873f58, 0x3dbf648eb799e45b, 0x33f22475ee0e86f8,
|
||||
0x1eb4f9ee16d47f65, 0x40f8d2b8712744e3, 0xb886b4da3cb14572, 0x2086326fbdd6f64d,
|
||||
0xcc3de5907dd882b9, 0xa2e8b49a5ee909df, 0xdbfb8e7823964c10, 0x70dd6089ef0df8d5,
|
||||
0x30141663cdd9c99f, 0x04b805325c240365, 0x7483d80314ac12d6, 0x2b271cb91aa7f5f9,
|
||||
0x97e2245362abddf0, 0x5a84f614232a9fab, 0xf71125fcda4b7fa2, 0x1ca5a61d74b27267,
|
||||
0x38cc6a9b3adbcb45, 0xdde1bb85dc653e39, 0xe9d0c8fa64f89fd4, 0x02c5fb1ecd2b4188,
|
||||
0xf2bd137bca5756e5, 0xadefe25d121be155, 0x56cd1c3c5d893a8e, 0x4c50d337beb65bb9,
|
||||
0x918c5151675cf567, 0xaba649ffcfb56a1e, 0x20c74ab26a2247cd, 0x71166bac853c08da,
|
||||
0xb07befe2e584fc5d, 0xda45ff2a588dbf32, 0xdb98b03c4d75095e, 0x60285ae1aaa65a4c,
|
||||
0xf93b686a263140b8, 0xde469752ee1c180e, 0xcec232dc04129aae, 0xeb916baa1835ea04,
|
||||
0xd49c21c8b64388ff, 0x72a82d9658864888, 0x003348ef7eac66a8, 0x7f6f67e655b209eb,
|
||||
0x532ffb0b7a941b25, 0xd940ade6128deede, 0xdf24f2a1af89fe23, 0x95aa3b4988195ae0,
|
||||
0x3da649404f94be4a, 0x692dad132c3f7e27, 0x40aee76ecaaa9eb8, 0x1294a01e09655024,
|
||||
0x6df797abdba4e4f5, 0xea2fb6024c1d7032, 0x5f4e0492295489fc, 0x57972914ea22e06a,
|
||||
0x9a8137d133aad473, 0xa2e6dd6ae7cdf2f3, 0x9f42644f18086647, 0x16d03301c170bd3e,
|
||||
0x908c416fa546656d, 0xe081503be22e123e, 0x077cf09116c4cc72, 0xcbd25cd264b7f229,
|
||||
0x3db2f468ec594031, 0x46c00e734c9badd5, 0xd0ec0ac72075d861, 0x3037cb3cf80b7630,
|
||||
0x574c3d7b3a2721c6, 0xae99906a0076824b, 0xb175a5418b532e70, 0xd8b3e251ee231ddd,
|
||||
0xb433eec25dca1966, 0x530f30dc5cff9a93, 0x9ff03d98b53cd335, 0xafc4225076558cdf,
|
||||
0xef81d3a28284402a, 0x110bdbf51c110a28, 0x9ae1b255d027e8f6, 0x7de3e0aa24688332,
|
||||
0xe483c3ecd2067ee2, 0xf829328b276137e6, 0xa413ccad57562cad, 0xe6118e8b496acb1f,
|
||||
0x8288dca6da5ec01f, 0xa53777dc88c17255, 0x8a00f1e0d5716eda, 0x618e6f47b7a720a8,
|
||||
0x9e3907b0c692a841, 0x978b42ca963f34f3, 0x75e4b0cd98a7d7ef, 0xde4dbd6e0b5f4752,
|
||||
0x0252e4153f34493f, 0x50f0e7d803734ef9, 0x237766a38ed167ee, 0x4124414001ee39a0,
|
||||
0xd08df643e535bb21, 0x34f575b5a9a80b74, 0x2c343af87297f755, 0xcd8b6d99d821f7cb,
|
||||
0xe376fd7256fc48ae, 0xe1b06e7334352885, 0xfa87b26f86c169eb, 0x36c1604665a971de,
|
||||
0xdba147c2239c8e80, 0x6b208e69fc7f0e24, 0x8795395b6f2b60c3, 0x05dabee9194907f4,
|
||||
0xb98175142f5ed902, 0x5e1701e2021ddc81, 0x0875aba2755eed08, 0x778d83289251de95,
|
||||
0x3bfbe46a039ecb31, 0xb24704fce4cbd7f9, 0x6985ffe9a7c91e3d, 0xc8efb13df249dabb,
|
||||
0xb1037e64b0f4c9f6, 0x55f69fd197d6b7c3, 0x672589d71d68a90c, 0xbebdb8224f50a77e,
|
||||
0x3f589f80007374a7, 0xd307f4635954182a, 0xcff5850c10d4fd90, 0xc6da02dfb6408e15,
|
||||
0x93daeef1e2b1a485, 0x65d833208aeea625, 0xe2b13fa13ed3b5fa, 0x67053538130fb68e,
|
||||
0xc1042f6598218fa9, 0xee5badca749b8a2e, 0x6d22a3f947dae37d, 0xb62c6d1657f4dbaf,
|
||||
0x6e007de69704c20b, 0x1af2b913fc3841d8, 0xdc0e47348e2e8e22, 0x9b1ddef1cf958b22,
|
||||
0x632ed6b0233066b8, 0xddd02d3311bed8f2, 0xf147cfe1834656e9, 0x399aaa49d511597a,
|
||||
0x6b14886979ec0309, 0x64fc4ac36b5afb97, 0xb82f78e07f7cf081, 0x10925c9a323d0e1b,
|
||||
0xf451c79ee13c63f6, 0x7c2fc180317876c7, 0x35a12bd9eecb7d22, 0x335654a539621f90,
|
||||
0xcc32a3f35db581f0, 0xc60748a80b2369cb, 0x7c4dd3b08591156b, 0xac1ced4b6de22291,
|
||||
0xa32cfa2df134def5, 0x627108918dea2a53, 0x0555b1608fcb4ff4, 0x143ee7ac43aaa33c,
|
||||
0xdae90ce7cf4fc218, 0x4d68fc2582bcf4b5, 0x37094e1849135d71, 0xf7857e09f3d49fd8,
|
||||
0x007538c503768be7, 0xedf648ba2f6be601, 0xaa347664dd72513e, 0xbe63893c6ef23b86,
|
||||
0x130b85710605af97, 0xdd765c6b1ef6ab56, 0xf3249a629a97dc6b, 0x2a114f9020fab8e5,
|
||||
0x5a69e027cfc6ad08, 0x3c4ccb36f1a5e050, 0x2e9e7d596834f0a5, 0x2430be6858fce789,
|
||||
0xe90b862f2466e597, 0x895e2884f159a9ec, 0x26ab8fa4902fcb57, 0xa6efff5c54e1fa50,
|
||||
0x333ac4e5811a8255, 0xa58d515f02498611, 0xfe5a09dcb25c6ef4, 0x03898988ab5f5818,
|
||||
0x289ff6242af6c617, 0x3d9dd59fd381ea23, 0x52d7d93d8a8aae51, 0xc76a123d511f786f,
|
||||
0xf68901edaf00c46c, 0x8c630871b590de80, 0x05209c308991e091, 0x1f809f99b4788177,
|
||||
0x11170c2eb6c19fd8, 0x44433c779062ba58, 0xc0acb51af1874c45, 0x9f2e134284809fa1,
|
||||
0xedb523bd15c619fa, 0x02d97fd53ecc23c0, 0xacaf05a34462374c, 0xddd9c6d34bffa11f,
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user