1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

Add AVX SSE4.1 insertps, ptest and movntdqa instructions

llvm-svn: 107747
This commit is contained in:
Bruno Cardoso Lopes 2010-07-07 01:14:56 +00:00
parent fa10461265
commit 675ebe2dc0
3 changed files with 79 additions and 18 deletions

View File

@ -4602,33 +4602,49 @@ let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
let Constraints = "$src1 = $dst" in {
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, OpSize;
}
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, OpSize;
}
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
OpSize, VEX;
}
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
@ -4640,6 +4656,11 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize, VEX;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,

View File

@ -12102,3 +12102,23 @@
// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
vpinsrd $7, (%eax), %xmm2, %xmm5
// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1
// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
vinsertps $7, %xmm2, %xmm5, %xmm1
// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1
// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
vinsertps $7, (%eax), %xmm5, %xmm1
// CHECK: vptest %xmm2, %xmm5
// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
vptest %xmm2, %xmm5
// CHECK: vptest (%eax), %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
vptest (%eax), %xmm2
// CHECK: vmovntdqa (%eax), %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
vmovntdqa (%eax), %xmm2

View File

@ -2166,3 +2166,23 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
vpinsrq $7, (%rax), %xmm12, %xmm10
// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11
// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
vinsertps $7, %xmm12, %xmm10, %xmm11
// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11
// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
vinsertps $7, (%rax), %xmm10, %xmm11
// CHECK: vptest %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
vptest %xmm12, %xmm10
// CHECK: vptest (%rax), %xmm12
// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
vptest (%rax), %xmm12
// CHECK: vmovntdqa (%rax), %xmm12
// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
vmovntdqa (%rax), %xmm12