From 8d193ab39f6a3de878e802ec97527ef1680bc640 Mon Sep 17 00:00:00 2001 From: visualfc Date: Sun, 30 Jun 2024 22:16:46 +0800 Subject: [PATCH] runtime: map hasher --- internal/runtime/alg.go | 239 +++++++++++++++++++++++ internal/runtime/goarch/endian_big.go | 7 + internal/runtime/goarch/endian_little.go | 9 + internal/runtime/hash32.go | 61 ++++++ internal/runtime/hash64.go | 93 +++++++++ internal/runtime/z_map.go | 2 + internal/runtime/z_type.go | 2 +- 7 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 internal/runtime/goarch/endian_big.go create mode 100644 internal/runtime/goarch/endian_little.go create mode 100644 internal/runtime/hash32.go create mode 100644 internal/runtime/hash64.go diff --git a/internal/runtime/alg.go b/internal/runtime/alg.go index a3269fe8..f2c21b2d 100644 --- a/internal/runtime/alg.go +++ b/internal/runtime/alg.go @@ -2,8 +2,199 @@ package runtime import ( "unsafe" + + "github.com/goplus/llgo/internal/abi" + "github.com/goplus/llgo/internal/runtime/goarch" ) +const ( + c0 = uintptr((8-goarch.PtrSize)/4*2860486313 + (goarch.PtrSize-4)/4*33054211828000289) + c1 = uintptr((8-goarch.PtrSize)/4*3267000013 + (goarch.PtrSize-4)/4*23344194077549503) +) + +func memhash0(p unsafe.Pointer, h uintptr) uintptr { + return h +} + +func memhash8(p unsafe.Pointer, h uintptr) uintptr { + return memhash(p, h, 1) +} + +func memhash16(p unsafe.Pointer, h uintptr) uintptr { + return memhash(p, h, 2) +} + +func memhash128(p unsafe.Pointer, h uintptr) uintptr { + return memhash(p, h, 16) +} + +//go:nosplit +// func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr { +// ptr := getclosureptr() +// size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h))) +// return memhash(p, h, size) +// } + +// in asm_*.s +// func memhash(p unsafe.Pointer, h, s uintptr) uintptr +// func memhash32(p unsafe.Pointer, h uintptr) uintptr +// func memhash64(p unsafe.Pointer, h uintptr) uintptr +// func strhash(p unsafe.Pointer, h uintptr) uintptr + +func strhash(a unsafe.Pointer, h uintptr) uintptr { + x := (*String)(a) + return memhash(x.data, h, uintptr(x.len)) +} + +// NOTE: Because NaN != NaN, a map can contain any +// number of (mostly useless) entries keyed with NaNs. +// To avoid long hash chains, we assign a random number +// as the hash value for a NaN. + +func f32hash(p unsafe.Pointer, h uintptr) uintptr { + f := *(*float32)(p) + switch { + case f == 0: + return c1 * (c0 ^ h) // +0, -0 + case f != f: + return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN + default: + return memhash(p, h, 4) + } +} + +func f64hash(p unsafe.Pointer, h uintptr) uintptr { + f := *(*float64)(p) + switch { + case f == 0: + return c1 * (c0 ^ h) // +0, -0 + case f != f: + return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN + default: + return memhash(p, h, 8) + } +} + +func c64hash(p unsafe.Pointer, h uintptr) uintptr { + x := (*[2]float32)(p) + return f32hash(unsafe.Pointer(&x[1]), f32hash(unsafe.Pointer(&x[0]), h)) +} + +func c128hash(p unsafe.Pointer, h uintptr) uintptr { + x := (*[2]float64)(p) + return f64hash(unsafe.Pointer(&x[1]), f64hash(unsafe.Pointer(&x[0]), h)) +} + +func interhash(p unsafe.Pointer, h uintptr) uintptr { + a := (*iface)(p) + tab := a.tab + if tab == nil { + return h + } + t := tab._type + if t.Equal == nil { + // Check hashability here. We could do this check inside + // typehash, but we want to report the topmost type in + // the error text (e.g. in a struct with a field of slice type + // we want to report the struct, not the slice). + panic(errorString("hash of unhashable type " + t.String())) + } + if isDirectIface(t) { + return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0) + } else { + return c1 * typehash(t, a.data, h^c0) + } +} + +func nilinterhash(p unsafe.Pointer, h uintptr) uintptr { + a := (*eface)(p) + t := a._type + if t == nil { + return h + } + if t.Equal == nil { + // See comment in interhash above. + panic(errorString("hash of unhashable type " + t.String())) + } + if isDirectIface(t) { + return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0) + } else { + return c1 * typehash(t, a.data, h^c0) + } +} + +// typehash computes the hash of the object of type t at address p. +// h is the seed. +// This function is seldom used. Most maps use for hashing either +// fixed functions (e.g. f32hash) or compiler-generated functions +// (e.g. for a type like struct { x, y string }). This implementation +// is slower but more general and is used for hashing interface types +// (called from interhash or nilinterhash, above) or for hashing in +// maps generated by reflect.MapOf (reflect_typehash, below). +// Note: this function must match the compiler generated +// functions exactly. See issue 37716. +func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr { + // if t.TFlag&abi.TFlagRegularMemory != 0 { + // // Handle ptr sizes specially, see issue 37086. + // switch t.Size_ { + // case 4: + // return memhash32(p, h) + // case 8: + // return memhash64(p, h) + // default: + // return memhash(p, h, t.Size_) + // } + // } + switch t.Kind() { + case abi.Bool, abi.Int, abi.Int8, abi.Int16, abi.Int32, abi.Int64, + abi.Uint, abi.Uint8, abi.Uint16, abi.Uint32, abi.Uint64, + abi.Uintptr, abi.UnsafePointer: + switch t.Size_ { + case 4: + return memhash32(p, h) + case 8: + return memhash64(p, h) + default: + return memhash(p, h, t.Size_) + } + case abi.Float32: + return f32hash(p, h) + case abi.Float64: + return f64hash(p, h) + case abi.Complex64: + return c64hash(p, h) + case abi.Complex128: + return c128hash(p, h) + case abi.String: + return strhash(p, h) + case abi.Interface: + i := (*interfacetype)(unsafe.Pointer(t)) + if len(i.Methods) == 0 { + return nilinterhash(p, h) + } + return interhash(p, h) + case abi.Array: + a := (*arraytype)(unsafe.Pointer(t)) + for i := uintptr(0); i < a.Len; i++ { + h = typehash(a.Elem, add(p, i*a.Elem.Size_), h) + } + return h + case abi.Struct: + s := (*structtype)(unsafe.Pointer(t)) + for _, f := range s.Fields { + if f.Name_ == "_" { + continue + } + h = typehash(f.Typ, add(p, f.Offset), h) + } + return h + default: + // Should never happen, as typehash should only be called + // with comparable types. + panic(errorString("hash of unhashable type " + t.String())) + } +} + func ptrequal(p, q unsafe.Pointer) bool { return p == q } @@ -81,3 +272,51 @@ func ifaceeq(tab *itab, x, y unsafe.Pointer) bool { } return eq(x, y) } + +// Testing adapters for hash quality tests (see hash_test.go) +func stringHash(s string, seed uintptr) uintptr { + return strhash(noescape(unsafe.Pointer(&s)), seed) +} + +func bytesHash(b []byte, seed uintptr) uintptr { + s := (*slice)(unsafe.Pointer(&b)) + return memhash(s.array, seed, uintptr(s.len)) +} + +func int32Hash(i uint32, seed uintptr) uintptr { + return memhash32(noescape(unsafe.Pointer(&i)), seed) +} + +func int64Hash(i uint64, seed uintptr) uintptr { + return memhash64(noescape(unsafe.Pointer(&i)), seed) +} + +func efaceHash(i any, seed uintptr) uintptr { + return nilinterhash(noescape(unsafe.Pointer(&i)), seed) +} + +func ifaceHash(i interface { + F() +}, seed uintptr) uintptr { + return interhash(noescape(unsafe.Pointer(&i)), seed) +} + +var hashkey [4]uintptr + +// Note: These routines perform the read with a native endianness. +func readUnaligned32(p unsafe.Pointer) uint32 { + q := (*[4]byte)(p) + if goarch.BigEndian { + return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24 + } + return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24 +} + +func readUnaligned64(p unsafe.Pointer) uint64 { + q := (*[8]byte)(p) + if goarch.BigEndian { + return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 | + uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56 + } + return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56 +} diff --git a/internal/runtime/goarch/endian_big.go b/internal/runtime/goarch/endian_big.go new file mode 100644 index 00000000..244b2abf --- /dev/null +++ b/internal/runtime/goarch/endian_big.go @@ -0,0 +1,7 @@ +//go:build 386 || amd64 || arm || arm64 || ppc64le || mips64le || mipsle || riscv64 || wasm +// +build 386 amd64 arm arm64 ppc64le mips64le mipsle riscv64 wasm + +package goarch + +const BigEndian = true +const LittleEndian = false diff --git a/internal/runtime/goarch/endian_little.go b/internal/runtime/goarch/endian_little.go new file mode 100644 index 00000000..9ce6b871 --- /dev/null +++ b/internal/runtime/goarch/endian_little.go @@ -0,0 +1,9 @@ +//go:build ppc64 || s390x || mips || mips64 +// +build ppc64 s390x mips mips64 + +package goarch + +const ( + BigEndian = false + LittleEndian = true +) diff --git a/internal/runtime/hash32.go b/internal/runtime/hash32.go new file mode 100644 index 00000000..818118f7 --- /dev/null +++ b/internal/runtime/hash32.go @@ -0,0 +1,61 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Hashing algorithm inspired by +// wyhash: https://github.com/wangyi-fudan/wyhash/blob/ceb019b530e2c1c14d70b79bfa2bc49de7d95bc1/Modern%20Non-Cryptographic%20Hash%20Function%20and%20Pseudorandom%20Number%20Generator.pdf + +//go:build 386 || arm || mips || mipsle + +package runtime + +import "unsafe" + +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + a, b := mix32(uint32(seed), uint32(4^hashkey[0])) + t := readUnaligned32(p) + a ^= t + b ^= t + a, b = mix32(a, b) + a, b = mix32(a, b) + return uintptr(a ^ b) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + a, b := mix32(uint32(seed), uint32(8^hashkey[0])) + a ^= readUnaligned32(p) + b ^= readUnaligned32(add(p, 4)) + a, b = mix32(a, b) + a, b = mix32(a, b) + return uintptr(a ^ b) +} + +func memhash(p unsafe.Pointer, seed, s uintptr) uintptr { + a, b := mix32(uint32(seed), uint32(s^hashkey[0])) + if s == 0 { + return uintptr(a ^ b) + } + for ; s > 8; s -= 8 { + a ^= readUnaligned32(p) + b ^= readUnaligned32(add(p, 4)) + a, b = mix32(a, b) + p = add(p, 8) + } + if s >= 4 { + a ^= readUnaligned32(p) + b ^= readUnaligned32(add(p, s-4)) + } else { + t := uint32(*(*byte)(p)) + t |= uint32(*(*byte)(add(p, s>>1))) << 8 + t |= uint32(*(*byte)(add(p, s-1))) << 16 + b ^= t + } + a, b = mix32(a, b) + a, b = mix32(a, b) + return uintptr(a ^ b) +} + +func mix32(a, b uint32) (uint32, uint32) { + c := uint64(a^uint32(hashkey[1])) * uint64(b^uint32(hashkey[2])) + return uint32(c), uint32(c >> 32) +} diff --git a/internal/runtime/hash64.go b/internal/runtime/hash64.go new file mode 100644 index 00000000..31d2a67a --- /dev/null +++ b/internal/runtime/hash64.go @@ -0,0 +1,93 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Hashing algorithm inspired by +// wyhash: https://github.com/wangyi-fudan/wyhash + +//go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm + +package runtime + +import ( + "unsafe" + + "github.com/goplus/llgo/internal/runtime/math" +) + +const ( + m1 = 0xa0761d6478bd642f + m2 = 0xe7037ed1a0b428db + m3 = 0x8ebc6af09c88c6e3 + m4 = 0x589965cc75374cc3 + m5 = 0x1d8e4e27c47d124f +) + +func memhash(p unsafe.Pointer, seed, s uintptr) uintptr { + var a, b uintptr + seed ^= hashkey[0] ^ m1 + switch { + case s == 0: + return seed + case s < 4: + a = uintptr(*(*byte)(p)) + a |= uintptr(*(*byte)(add(p, s>>1))) << 8 + a |= uintptr(*(*byte)(add(p, s-1))) << 16 + case s == 4: + a = r4(p) + b = a + case s < 8: + a = r4(p) + b = r4(add(p, s-4)) + case s == 8: + a = r8(p) + b = a + case s <= 16: + a = r8(p) + b = r8(add(p, s-8)) + default: + l := s + if l > 48 { + seed1 := seed + seed2 := seed + for ; l > 48; l -= 48 { + seed = mix(r8(p)^m2, r8(add(p, 8))^seed) + seed1 = mix(r8(add(p, 16))^m3, r8(add(p, 24))^seed1) + seed2 = mix(r8(add(p, 32))^m4, r8(add(p, 40))^seed2) + p = add(p, 48) + } + seed ^= seed1 ^ seed2 + } + for ; l > 16; l -= 16 { + seed = mix(r8(p)^m2, r8(add(p, 8))^seed) + p = add(p, 16) + } + a = r8(add(p, l-16)) + b = r8(add(p, l-8)) + } + + return mix(m5^s, mix(a^m2, b^seed)) +} + +func memhash32(p unsafe.Pointer, seed uintptr) uintptr { + a := r4(p) + return mix(m5^4, mix(a^m2, a^seed^hashkey[0]^m1)) +} + +func memhash64(p unsafe.Pointer, seed uintptr) uintptr { + a := r8(p) + return mix(m5^8, mix(a^m2, a^seed^hashkey[0]^m1)) +} + +func mix(a, b uintptr) uintptr { + hi, lo := math.Mul64(uint64(a), uint64(b)) + return uintptr(hi ^ lo) +} + +func r4(p unsafe.Pointer) uintptr { + return uintptr(readUnaligned32(p)) +} + +func r8(p unsafe.Pointer) uintptr { + return uintptr(readUnaligned64(p)) +} diff --git a/internal/runtime/z_map.go b/internal/runtime/z_map.go index 363de015..2cb8e9ee 100644 --- a/internal/runtime/z_map.go +++ b/internal/runtime/z_map.go @@ -25,6 +25,8 @@ import ( // Map represents a Go map. type Map = hmap type maptype = abi.MapType +type arraytype = abi.ArrayType +type structtype = abi.StructType type slice struct { array unsafe.Pointer diff --git a/internal/runtime/z_type.go b/internal/runtime/z_type.go index 2b94da37..3caacb2a 100644 --- a/internal/runtime/z_type.go +++ b/internal/runtime/z_type.go @@ -230,7 +230,7 @@ func MapOf(key, elem *Type, bucket *Type, flags int) *Type { Flags: uint32(flags), } ret.Hasher = func(p unsafe.Pointer, seed uintptr) uintptr { - return uintptr(p) + return typehash(key, p, seed) } return &ret.Type }