112 lines
4.3 KiB
Diff
112 lines
4.3 KiB
Diff
From 34f04fb1e5a0c305463016d99ca47b2ed52ff472 Mon Sep 17 00:00:00 2001
|
|
From: luofeng 00425382 <luofeng13@huawei.com>
|
|
Date: Sat, 9 Sep 2023 16:55:00 +0800
|
|
Subject: [PATCH] support clang build
|
|
|
|
---
|
|
src/util/simd_arm.h | 26 ++++++++++++++++----------
|
|
1 file changed, 16 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/src/util/simd_arm.h b/src/util/simd_arm.h
|
|
index cce119f..feaf504 100644
|
|
--- a/src/util/simd_arm.h
|
|
+++ b/src/util/simd_arm.h
|
|
@@ -82,6 +82,12 @@ vmvnq_u64(uint64x2_t a) {
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wshadow"
|
|
|
|
+#define VSHLQ_N_S64(a0, a1) vshlq_n_s64(a0, a1)
|
|
+#define VSHRQ_N_U64(a0, a1) vshrq_n_u64(a0, a1)
|
|
+#define VEXTQ_S8(a0, a1, a2) vextq_s8(a0, a1, a2)
|
|
+#define VGETQ_LANE_S32(a0, a1) vgetq_lane_s32(a0, a1)
|
|
+#define VGETQ_LANE_S64(a0, a1) vgetq_lane_s64(a0, a1)
|
|
+
|
|
static really_inline m128 ones128(void) {
|
|
m128 result;
|
|
result.vect_s32 = vdupq_n_s32(0xFFFFFFFF);
|
|
@@ -130,7 +136,7 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) {
|
|
static really_really_inline m128 lshift64_m128(m128 a, unsigned b) {
|
|
assert(b <= 63);
|
|
m128 result;
|
|
- result.vect_s64 = vshlq_n_s64(a.vect_s64, b);
|
|
+ result.vect_s64 = VSHLQ_N_S64(a.vect_s64, b);
|
|
return result;
|
|
}
|
|
|
|
@@ -140,7 +146,7 @@ static really_really_inline m128 rshift64_m128(m128 a, int imm8) {
|
|
return a;
|
|
}
|
|
m128 result;
|
|
- result.vect_u64 = vshrq_n_u64(a.vect_u64, imm8);
|
|
+ result.vect_u64 = VSHRQ_N_U64(a.vect_u64, imm8);
|
|
return result;
|
|
}
|
|
|
|
@@ -163,7 +169,7 @@ static really_really_inline u32 movemask128(m128 a) {
|
|
static really_really_inline m128 rshiftbyte_m128(m128 a, int imm8) {
|
|
assert(imm8 >= 0 && imm8 <= 15);
|
|
m128 result;
|
|
- result.vect_s8 = vextq_s8(a.vect_s8, vdupq_n_s8(0), imm8);
|
|
+ result.vect_s8 = VEXTQ_S8(a.vect_s8, vdupq_n_s8(0), imm8);
|
|
return result;
|
|
}
|
|
|
|
@@ -173,7 +179,7 @@ static really_really_inline m128 lshiftbyte_m128(m128 a, int imm8) {
|
|
if (unlikely(imm8 == 0)) {
|
|
return a;
|
|
}
|
|
- result.vect_s8 = vextq_s8(vdupq_n_s8(0), a.vect_s8, (16 - imm8));
|
|
+ result.vect_s8 = VEXTQ_S8(vdupq_n_s8(0), a.vect_s8, (16 - imm8));
|
|
return result;
|
|
}
|
|
|
|
@@ -219,13 +225,13 @@ static really_inline m128 load_m128_from_u64a(const u64a *p) {
|
|
/*The x86 platform does not perform the lower 2 bit operation.
|
|
If the value of imm exceeds 2 bit, a compilation error occurs.*/
|
|
static really_inline u32 extract32from128(m128 a, int imm) {
|
|
- return vgetq_lane_s32(a.vect_s32, imm & 0x0003);
|
|
+ return VGETQ_LANE_S32(a.vect_s32, imm & 0x0003);
|
|
}
|
|
|
|
/*The x86 platform does not perform the lower 1 bit operation.
|
|
If the value of imm exceeds 1 bit, a compilation error occurs.*/
|
|
static really_inline u64a extract64from128(m128 a, int imm) {
|
|
- return vgetq_lane_s64(a.vect_s64, imm & 0x0001);
|
|
+ return VGETQ_LANE_S64(a.vect_s64, imm & 0x0001);
|
|
}
|
|
|
|
#define extractlow64from256(a) movq(a.lo)
|
|
@@ -234,14 +240,14 @@ static really_inline u64a extract64from128(m128 a, int imm) {
|
|
/*The x86 platform does not perform the lower 2 bit operation.
|
|
If the value of imm exceeds 2 bit, a compilation error occurs.*/
|
|
static really_inline u32 extract32from256(m256 a, int imm) {
|
|
- return vgetq_lane_s32((imm >> 2) ? a.hi.vect_s32 : a.lo.vect_s32,
|
|
+ return VGETQ_LANE_S32((imm >> 2) ? a.hi.vect_s32 : a.lo.vect_s32,
|
|
imm & 0x0003);
|
|
}
|
|
|
|
/*The x86 platform does not perform the lower 1 bit operation.
|
|
If the value of imm exceeds 1 bit, a compilation error occurs.*/
|
|
static really_inline u64a extract64from256(m256 a, int imm) {
|
|
- return vgetq_lane_s64((imm >> 1) ? a.hi.vect_s64 : a.lo.vect_s64,
|
|
+ return VGETQ_LANE_S64((imm >> 1) ? a.hi.vect_s64 : a.lo.vect_s64,
|
|
imm & 0x0001);
|
|
}
|
|
|
|
@@ -355,9 +361,9 @@ static really_inline m128 palignr(m128 a, m128 b, int count) {
|
|
m128 result;
|
|
count = count & 0xff;
|
|
if (likely(count < 16)) {
|
|
- result.vect_s8 = vextq_s8(b.vect_s8, a.vect_s8, count);
|
|
+ result.vect_s8 = VEXTQ_S8(b.vect_s8, a.vect_s8, count);
|
|
} else if (count < 32) {
|
|
- result.vect_s8 = vextq_s8(a.vect_s8, vdupq_n_s8(0x0), count - 16);
|
|
+ result.vect_s8 = VEXTQ_S8(a.vect_s8, vdupq_n_s8(0x0), count - 16);
|
|
} else {
|
|
result.vect_s32 = vdupq_n_s32(0);
|
|
}
|
|
--
|
|
2.28.0.windows.1
|
|
|
|
|