mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[PATCH] Fix VGATHER* operand constraints
Add earlyclobber constaints to prevent input register being allocated as the output register because, according to Intel spec [1], "If any pair of the index, mask, or destination registers are the same, this instruction results a UD fault." --- [1] http://software.intel.com/sites/default/files/319433-014.pdf llvm-svn: 183327
This commit is contained in:
parent
0070e26a1c
commit
d464e3d65b
@ -2041,6 +2041,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
case Intrinsic::x86_avx2_gather_d_d_256:
|
||||
case Intrinsic::x86_avx2_gather_q_d:
|
||||
case Intrinsic::x86_avx2_gather_q_d_256: {
|
||||
if (!Subtarget->hasAVX2())
|
||||
break;
|
||||
unsigned Opc;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic");
|
||||
|
@ -8372,7 +8372,9 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
|
||||
[]>, VEX_4VOp3, VEX_L;
|
||||
}
|
||||
|
||||
let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
|
||||
let mayLoad = 1, Constraints
|
||||
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
in {
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
|
||||
|
18
test/CodeGen/X86/avx2-gather.ll
Normal file
18
test/CodeGen/X86/avx2-gather.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
|
||||
|
||||
declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
|
||||
<4 x i32>, <4 x float>, i8) nounwind readonly
|
||||
|
||||
define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
|
||||
<4 x i32> %idx, <4 x float> %mask) {
|
||||
%res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
|
||||
i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK: test_x86_avx2_gather_d_ps
|
||||
; CHECK: vgatherdps
|
||||
; CHECK-NOT: [[DST]]
|
||||
; CHECK: [[DST:%xmm[0-9]+]]{{$}}
|
||||
; CHECK: ret
|
Loading…
Reference in New Issue
Block a user