mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AVX-512] Don't let ExeDependencyFix pass convert VPANDD/Q to VPANDPS/PD unless DQI instructions are supported. Same for ANDN, OR, and XOR.
Thanks to Igor Breger for pointing out my mistake. llvm-svn: 277292
This commit is contained in:
parent
ab016f68fc
commit
01f8dc5886
@ -7349,7 +7349,17 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
|
||||
|
||||
static const uint16_t ReplaceableInstrsAVX512[][4] = {
|
||||
// Two integer columns for 64-bit and 32-bit elements.
|
||||
//PackedSingle PackedDouble PackedInt PackedInt
|
||||
//PackedSingle PackedDouble PackedInt PackedInt
|
||||
{ X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
|
||||
{ X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
|
||||
{ X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
|
||||
{ X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
|
||||
{ X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
|
||||
};
|
||||
|
||||
static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
|
||||
// Two integer columns for 64-bit and 32-bit elements.
|
||||
//PackedSingle PackedDouble PackedInt PackedInt
|
||||
{ X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
|
||||
{ X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
|
||||
{ X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
|
||||
@ -7374,11 +7384,6 @@ static const uint16_t ReplaceableInstrsAVX512[][4] = {
|
||||
{ X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
|
||||
{ X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
|
||||
{ X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
|
||||
{ X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
|
||||
{ X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
|
||||
{ X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
|
||||
{ X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
|
||||
{ X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
|
||||
};
|
||||
|
||||
// FIXME: Some shuffle and unpack instructions have equivalents in different
|
||||
@ -7406,17 +7411,26 @@ static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static const uint16_t *lookupAVX512DQ(unsigned opcode, unsigned domain) {
|
||||
// If this is the integer domain make sure to check both integer columns.
|
||||
for (const uint16_t (&Row)[4] : ReplaceableInstrsAVX512DQ)
|
||||
if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
|
||||
return Row;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<uint16_t, uint16_t>
|
||||
X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
|
||||
uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
|
||||
bool hasAVX2 = Subtarget.hasAVX2();
|
||||
uint16_t validDomains = 0;
|
||||
if (domain && lookup(MI.getOpcode(), domain))
|
||||
validDomains = 0xe;
|
||||
else if (domain && lookupAVX2(MI.getOpcode(), domain))
|
||||
validDomains = hasAVX2 ? 0xe : 0x6;
|
||||
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
|
||||
else if (domain && lookupAVX512(MI.getOpcode(), domain))
|
||||
validDomains = 0xe;
|
||||
else if (domain && lookupAVX512DQ(MI.getOpcode(), domain))
|
||||
validDomains = Subtarget.hasDQI() ? 0xe : 0x8;
|
||||
return std::make_pair(domain, validDomains);
|
||||
}
|
||||
|
||||
@ -7431,9 +7445,17 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
|
||||
table = lookupAVX2(MI.getOpcode(), dom);
|
||||
}
|
||||
if (!table) { // try the AVX512 table
|
||||
assert(Subtarget.hasAVX512() && "Requires AVX-512");
|
||||
table = lookupAVX512(MI.getOpcode(), dom);
|
||||
// Don't change integer Q instructions to D instructions.
|
||||
if (dom == 3 && table[3] == MI.getOpcode())
|
||||
if (table && dom == 3 && table[3] == MI.getOpcode())
|
||||
Domain = 4;
|
||||
}
|
||||
if (!table) { // try the AVX512DQ table
|
||||
assert((Subtarget.hasDQI() || Domain >=3) && "Requires AVX-512DQ");
|
||||
table = lookupAVX512DQ(MI.getOpcode(), dom);
|
||||
// Don't change integer Q instructions to D instructions.
|
||||
if (table && dom == 3 && table[3] == MI.getOpcode())
|
||||
Domain = 4;
|
||||
}
|
||||
assert(table && "Cannot change domain");
|
||||
|
@ -962,10 +962,30 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
||||
}
|
||||
|
||||
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
||||
; CHECK-LABEL: fabs_v8f64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
; AVX512F-LABEL: fabs_v8f64:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v8f64:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v8f64:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v8f64:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fabs_v8f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
{
|
||||
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
ret <8 x double> %t
|
||||
|
@ -1025,7 +1025,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16
|
||||
define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_xor_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
@ -1047,7 +1047,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i6
|
||||
define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_or_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
@ -1069,7 +1069,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64
|
||||
define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_and_epi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
|
||||
ret < 8 x i64> %res
|
||||
|
@ -8,7 +8,7 @@ define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
|
||||
; CHECK-NEXT: cmpl $255, %edi
|
||||
; CHECK-NEXT: je LBB0_2
|
||||
; CHECK-NEXT: ## BB#1:
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1
|
||||
; CHECK-NEXT: LBB0_2:
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -25,9 +25,9 @@ define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
|
||||
; CHECK-NEXT: cmpl $255, %edi
|
||||
; CHECK-NEXT: je LBB1_2
|
||||
; CHECK-NEXT: ## BB#1:
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1
|
||||
; CHECK-NEXT: LBB1_2:
|
||||
; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cmpres = icmp eq i32 %a, 255
|
||||
%selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
|
||||
|
@ -2330,7 +2330,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i
|
||||
define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1]
|
||||
; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
@ -2360,7 +2360,7 @@ define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8
|
||||
define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_andnot_epi64_rm_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07]
|
||||
; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <2 x i64>, <2 x i64>* %ptr_b
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
|
||||
@ -2434,7 +2434,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i
|
||||
define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1]
|
||||
; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
@ -2464,7 +2464,7 @@ define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8
|
||||
define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
|
||||
; CHECK-LABEL: test_mask_andnot_epi64_rm_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07]
|
||||
; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%b = load <4 x i64>, <4 x i64>* %ptr_b
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
|
||||
|
@ -117,7 +117,7 @@ define void @test_zero_v4f32(<4 x float>* %dst) {
|
||||
; VLX-LABEL: test_zero_v4f32:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
|
||||
ret void
|
||||
@ -139,7 +139,7 @@ define void @test_zero_v4i32(<4 x i32>* %dst) {
|
||||
; VLX-LABEL: test_zero_v4i32:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
|
||||
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
|
||||
@ -162,7 +162,7 @@ define void @test_zero_v2f64(<2 x double>* %dst) {
|
||||
; VLX-LABEL: test_zero_v2f64:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
|
||||
ret void
|
||||
@ -184,7 +184,7 @@ define void @test_zero_v2i64(<2 x i64>* %dst) {
|
||||
; VLX-LABEL: test_zero_v2i64:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
|
||||
ret void
|
||||
@ -206,7 +206,7 @@ define void @test_zero_v8i16(<8 x i16>* %dst) {
|
||||
; VLX-LABEL: test_zero_v8i16:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
|
||||
ret void
|
||||
@ -228,7 +228,7 @@ define void @test_zero_v16i8(<16 x i8>* %dst) {
|
||||
; VLX-LABEL: test_zero_v16i8:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; VLX-NEXT: vmovntps %xmm0, (%rdi)
|
||||
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
|
||||
ret void
|
||||
|
@ -76,7 +76,7 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read
|
||||
|
||||
define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_andpd
|
||||
;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
||||
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
||||
@ -89,7 +89,7 @@ define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
|
||||
define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_andpd_ymm
|
||||
;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
||||
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
||||
@ -198,7 +198,7 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read
|
||||
|
||||
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_orpd
|
||||
;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
||||
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
||||
@ -211,7 +211,7 @@ define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
|
||||
define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_orpd_ymm
|
||||
;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
||||
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
||||
@ -316,7 +316,7 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read
|
||||
|
||||
define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_xorpd
|
||||
;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
||||
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
||||
@ -329,7 +329,7 @@ define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
|
||||
define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_xorpd_ymm
|
||||
;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
||||
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
||||
|
Loading…
Reference in New Issue
Block a user