mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86] Update SSE/AVX integer MINMAX intrinsics to emit llvm.smax.* etc. (PR46851)
We're now getting close to having the necessary analysis/combines etc. for the new generic llvm smax/smin/umax/umin intrinsics. This patch updates the SSE/AVX integer MINMAX intrinsics to emit the generic equivalents instead of the icmp+select code pattern. Differential Revision: https://reviews.llvm.org/D87603
This commit is contained in:
parent
df6a87cef9
commit
5f9b5ed281
@ -1380,19 +1380,6 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
|
||||
return Res;
|
||||
}
|
||||
|
||||
static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
|
||||
ICmpInst::Predicate Pred) {
|
||||
Value *Op0 = CI.getArgOperand(0);
|
||||
Value *Op1 = CI.getArgOperand(1);
|
||||
Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
|
||||
Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
|
||||
|
||||
if (CI.getNumArgOperands() == 4)
|
||||
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
|
||||
Type *Ty = CI.getType();
|
||||
|
||||
@ -2136,25 +2123,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Name == "sse41.pmaxsd" ||
|
||||
Name.startswith("avx2.pmaxs") ||
|
||||
Name.startswith("avx512.mask.pmaxs"))) {
|
||||
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
|
||||
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
|
||||
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
|
||||
Name == "sse41.pmaxuw" ||
|
||||
Name == "sse41.pmaxud" ||
|
||||
Name.startswith("avx2.pmaxu") ||
|
||||
Name.startswith("avx512.mask.pmaxu"))) {
|
||||
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
|
||||
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
|
||||
} else if (IsX86 && (Name == "sse41.pminsb" ||
|
||||
Name == "sse2.pmins.w" ||
|
||||
Name == "sse41.pminsd" ||
|
||||
Name.startswith("avx2.pmins") ||
|
||||
Name.startswith("avx512.mask.pmins"))) {
|
||||
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
|
||||
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
|
||||
} else if (IsX86 && (Name == "sse2.pminu.b" ||
|
||||
Name == "sse41.pminuw" ||
|
||||
Name == "sse41.pminud" ||
|
||||
Name.startswith("avx2.pminu") ||
|
||||
Name.startswith("avx512.mask.pminu"))) {
|
||||
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
|
||||
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
|
||||
} else if (IsX86 && (Name == "sse2.pmulu.dq" ||
|
||||
Name == "avx2.pmulu.dq" ||
|
||||
Name == "avx512.pmulu.dq.512" ||
|
||||
|
@ -1632,11 +1632,11 @@ define <4 x i64> @test_mm256_max_epi8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
|
||||
%cmp = icmp sgt <32 x i8> %arg0, %arg1
|
||||
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
|
||||
%sel = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
|
||||
%bc = bitcast <32 x i8> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_max_epi16:
|
||||
@ -1645,11 +1645,11 @@ define <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
|
||||
%cmp = icmp sgt <16 x i16> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
|
||||
%sel = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
|
||||
%bc = bitcast <16 x i16> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_max_epi32:
|
||||
@ -1658,11 +1658,11 @@ define <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
||||
%cmp = icmp sgt <8 x i32> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
|
||||
%sel = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %arg0, <8 x i32> %arg1)
|
||||
%bc = bitcast <8 x i32> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
|
||||
|
||||
define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_max_epu8:
|
||||
@ -1671,11 +1671,11 @@ define <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
|
||||
%cmp = icmp ugt <32 x i8> %arg0, %arg1
|
||||
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
|
||||
%sel = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
|
||||
%bc = bitcast <32 x i8> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_max_epu16:
|
||||
@ -1684,11 +1684,11 @@ define <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
|
||||
%cmp = icmp ugt <16 x i16> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
|
||||
%sel = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
|
||||
%bc = bitcast <16 x i16> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_max_epu32:
|
||||
@ -1697,11 +1697,11 @@ define <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
||||
%cmp = icmp ugt <8 x i32> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
|
||||
%sel = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %arg0, <8 x i32> %arg1)
|
||||
%bc = bitcast <8 x i32> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epi8:
|
||||
@ -1710,11 +1710,11 @@ define <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
|
||||
%cmp = icmp slt <32 x i8> %arg0, %arg1
|
||||
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
|
||||
%sel = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
|
||||
%bc = bitcast <32 x i8> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epi16:
|
||||
@ -1723,11 +1723,11 @@ define <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
|
||||
%cmp = icmp slt <16 x i16> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
|
||||
%sel = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
|
||||
%bc = bitcast <16 x i16> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epi32:
|
||||
@ -1736,11 +1736,11 @@ define <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
||||
%cmp = icmp slt <8 x i32> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
|
||||
%sel = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %arg0, <8 x i32> %arg1)
|
||||
%bc = bitcast <8 x i32> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epu8:
|
||||
@ -1749,11 +1749,11 @@ define <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
|
||||
%cmp = icmp ult <32 x i8> %arg0, %arg1
|
||||
%sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
|
||||
%sel = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
|
||||
%bc = bitcast <32 x i8> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epu16:
|
||||
@ -1762,11 +1762,11 @@ define <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
|
||||
%cmp = icmp ult <16 x i16> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
|
||||
%sel = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
|
||||
%bc = bitcast <16 x i16> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-LABEL: test_mm256_min_epu32:
|
||||
@ -1775,11 +1775,11 @@ define <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
||||
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
||||
%cmp = icmp ult <8 x i32> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
|
||||
%sel = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %arg0, <8 x i32> %arg1)
|
||||
%bc = bitcast <8 x i32> %sel to <4 x i64>
|
||||
ret <4 x i64> %bc
|
||||
}
|
||||
declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
|
||||
|
||||
define i32 @test_mm256_movemask_epi8(<4 x i64> %a0) nounwind {
|
||||
; CHECK-LABEL: test_mm256_movemask_epi8:
|
||||
|
@ -2510,11 +2510,11 @@ define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
|
||||
%cmp = icmp sgt <8 x i16> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
|
||||
%sel = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
|
||||
%bc = bitcast <8 x i16> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; SSE-LABEL: test_mm_max_epu8:
|
||||
@ -2533,11 +2533,11 @@ define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
|
||||
%cmp = icmp ugt <16 x i8> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
|
||||
%sel = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
|
||||
%bc = bitcast <16 x i8> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
||||
; SSE-LABEL: test_mm_max_pd:
|
||||
@ -2606,11 +2606,11 @@ define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
|
||||
%cmp = icmp slt <8 x i16> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
|
||||
%sel = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
|
||||
%bc = bitcast <8 x i16> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; SSE-LABEL: test_mm_min_epu8:
|
||||
@ -2629,11 +2629,11 @@ define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
||||
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
|
||||
%cmp = icmp ult <16 x i8> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
|
||||
%sel = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
|
||||
%bc = bitcast <16 x i8> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
||||
; SSE-LABEL: test_mm_min_pd:
|
||||
|
@ -662,11 +662,11 @@ define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
|
||||
%cmp = icmp sgt <16 x i8> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
|
||||
%sel = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
|
||||
%bc = bitcast <16 x i8> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_max_epi32:
|
||||
@ -680,11 +680,11 @@ define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
||||
%cmp = icmp sgt <4 x i32> %arg0, %arg1
|
||||
%sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
|
||||
%sel = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1)
|
||||
%bc = bitcast <4 x i32> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_max_epu16:
|
||||
@ -698,11 +698,11 @@ define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
|
||||
%cmp = icmp ugt <8 x i16> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
|
||||
%sel = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
|
||||
%bc = bitcast <8 x i16> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_max_epu32:
|
||||
@ -716,11 +716,11 @@ define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
||||
%cmp = icmp ugt <4 x i32> %arg0, %arg1
|
||||
%sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
|
||||
%sel = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1)
|
||||
%bc = bitcast <4 x i32> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_min_epi8:
|
||||
@ -734,11 +734,11 @@ define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
|
||||
%cmp = icmp slt <16 x i8> %arg0, %arg1
|
||||
%sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
|
||||
%sel = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
|
||||
%bc = bitcast <16 x i8> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_min_epi32:
|
||||
@ -752,11 +752,11 @@ define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
||||
%cmp = icmp slt <4 x i32> %arg0, %arg1
|
||||
%sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
|
||||
%sel = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1)
|
||||
%bc = bitcast <4 x i32> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_min_epu16:
|
||||
@ -770,11 +770,11 @@ define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
|
||||
%cmp = icmp ult <8 x i16> %arg0, %arg1
|
||||
%sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
|
||||
%sel = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
|
||||
%bc = bitcast <8 x i16> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; SSE-LABEL: test_mm_min_epu32:
|
||||
@ -788,11 +788,11 @@ define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; AVX-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
||||
%cmp = icmp ult <4 x i32> %arg0, %arg1
|
||||
%sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
|
||||
%sel = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1)
|
||||
%bc = bitcast <4 x i32> %sel to <2 x i64>
|
||||
ret <2 x i64> %bc
|
||||
}
|
||||
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) {
|
||||
; SSE-LABEL: test_mm_minpos_epu16:
|
||||
|
Loading…
Reference in New Issue
Block a user