Initial commit: Go 1.23 release state
This commit is contained in:
206
src/internal/bytealg/index_arm64.s
Normal file
206
src/internal/bytealg/index_arm64.s
Normal file
@@ -0,0 +1,206 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·Index(SB),NOSPLIT,$0-56
|
||||
MOVD a_base+0(FP), R0
|
||||
MOVD a_len+8(FP), R1
|
||||
MOVD b_base+24(FP), R2
|
||||
MOVD b_len+32(FP), R3
|
||||
MOVD $ret+48(FP), R9
|
||||
B indexbody<>(SB)
|
||||
|
||||
TEXT ·IndexString(SB),NOSPLIT,$0-40
|
||||
MOVD a_base+0(FP), R0
|
||||
MOVD a_len+8(FP), R1
|
||||
MOVD b_base+16(FP), R2
|
||||
MOVD b_len+24(FP), R3
|
||||
MOVD $ret+32(FP), R9
|
||||
B indexbody<>(SB)
|
||||
|
||||
// input:
|
||||
// R0: haystack
|
||||
// R1: length of haystack
|
||||
// R2: needle
|
||||
// R3: length of needle (2 <= len <= 32)
|
||||
// R9: address to put result
|
||||
TEXT indexbody<>(SB),NOSPLIT,$0-56
|
||||
// main idea is to load 'sep' into separate register(s)
|
||||
// to avoid repeatedly re-load it again and again
|
||||
// for sebsequent substring comparisons
|
||||
SUB R3, R1, R4
|
||||
// R4 contains the start of last substring for comparison
|
||||
ADD R0, R4, R4
|
||||
ADD $1, R0, R8
|
||||
|
||||
CMP $8, R3
|
||||
BHI greater_8
|
||||
TBZ $3, R3, len_2_7
|
||||
len_8:
|
||||
// R5 contains 8-byte of sep
|
||||
MOVD (R2), R5
|
||||
loop_8:
|
||||
// R6 contains substring for comparison
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_8
|
||||
B found
|
||||
len_2_7:
|
||||
TBZ $2, R3, len_2_3
|
||||
TBZ $1, R3, len_4_5
|
||||
TBZ $0, R3, len_6
|
||||
len_7:
|
||||
// R5 and R6 contain 7-byte of sep
|
||||
MOVWU (R2), R5
|
||||
// 1-byte overlap with R5
|
||||
MOVWU 3(R2), R6
|
||||
loop_7:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_7
|
||||
MOVWU 2(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_7
|
||||
B found
|
||||
len_6:
|
||||
// R5 and R6 contain 6-byte of sep
|
||||
MOVWU (R2), R5
|
||||
MOVHU 4(R2), R6
|
||||
loop_6:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_6
|
||||
MOVHU 3(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_6
|
||||
B found
|
||||
len_4_5:
|
||||
TBZ $0, R3, len_4
|
||||
len_5:
|
||||
// R5 and R7 contain 5-byte of sep
|
||||
MOVWU (R2), R5
|
||||
MOVBU 4(R2), R7
|
||||
loop_5:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R3
|
||||
CMP R5, R3
|
||||
BNE loop_5
|
||||
MOVBU 3(R0), R3
|
||||
CMP R7, R3
|
||||
BNE loop_5
|
||||
B found
|
||||
len_4:
|
||||
// R5 contains 4-byte of sep
|
||||
MOVWU (R2), R5
|
||||
loop_4:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVWU.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_4
|
||||
B found
|
||||
len_2_3:
|
||||
TBZ $0, R3, len_2
|
||||
len_3:
|
||||
// R6 and R7 contain 3-byte of sep
|
||||
MOVHU (R2), R6
|
||||
MOVBU 2(R2), R7
|
||||
loop_3:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVHU.P 1(R0), R3
|
||||
CMP R6, R3
|
||||
BNE loop_3
|
||||
MOVBU 1(R0), R3
|
||||
CMP R7, R3
|
||||
BNE loop_3
|
||||
B found
|
||||
len_2:
|
||||
// R5 contains 2-byte of sep
|
||||
MOVHU (R2), R5
|
||||
loop_2:
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVHU.P 1(R0), R6
|
||||
CMP R5, R6
|
||||
BNE loop_2
|
||||
found:
|
||||
SUB R8, R0, R0
|
||||
MOVD R0, (R9)
|
||||
RET
|
||||
not_found:
|
||||
MOVD $-1, R0
|
||||
MOVD R0, (R9)
|
||||
RET
|
||||
greater_8:
|
||||
SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes
|
||||
CMP $16, R3
|
||||
BHI greater_16
|
||||
len_9_16:
|
||||
MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep
|
||||
SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes
|
||||
MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep
|
||||
loop_9_16:
|
||||
// search the first 8 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R7
|
||||
CMP R5, R7
|
||||
BNE loop_9_16
|
||||
MOVD (R0)(R11), R7
|
||||
CMP R6, R7 // compare the last 8 bytes
|
||||
BNE loop_9_16
|
||||
B found
|
||||
greater_16:
|
||||
CMP $24, R3
|
||||
BHI len_25_32
|
||||
len_17_24:
|
||||
LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep
|
||||
SUB $24, R3, R10 // len(sep) - 24
|
||||
MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep
|
||||
loop_17_24:
|
||||
// search the first 16 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R10
|
||||
CMP R5, R10
|
||||
BNE loop_17_24
|
||||
MOVD 7(R0), R10
|
||||
CMP R6, R10
|
||||
BNE loop_17_24
|
||||
MOVD (R0)(R11), R10
|
||||
CMP R7, R10 // compare the last 8 bytes
|
||||
BNE loop_17_24
|
||||
B found
|
||||
len_25_32:
|
||||
LDP.P 16(R2), (R5, R6)
|
||||
MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep
|
||||
SUB $32, R3, R12 // len(sep) - 32
|
||||
MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep
|
||||
loop_25_32:
|
||||
// search the first 24 bytes first
|
||||
CMP R4, R0
|
||||
BHI not_found
|
||||
MOVD.P 1(R0), R12
|
||||
CMP R5, R12
|
||||
BNE loop_25_32
|
||||
MOVD 7(R0), R12
|
||||
CMP R6, R12
|
||||
BNE loop_25_32
|
||||
MOVD 15(R0), R12
|
||||
CMP R7, R12
|
||||
BNE loop_25_32
|
||||
MOVD (R0)(R11), R12
|
||||
CMP R10, R12 // compare the last 8 bytes
|
||||
BNE loop_25_32
|
||||
B found
|
||||
Reference in New Issue
Block a user