mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[x86] Give movss and movsd execution domains in the x86 backend.
This associates movss and movsd with the packed single and packed double execution domains (resp.). While this is largely cosmetic, as we now don't have weird ping-pong-ing between single and double precision, it is also useful because it avoids the domain fixing algorithm from seeing domain breaks that don't actually exist. It will also be much more important if we have an execution domain default other than packed single, as that would cause us to mix movss and movsd with integer vector code on a regular basis, a very bad mixture. llvm-svn: 228135
This commit is contained in:
parent
7381b41ffe
commit
99f7e3a3dd
@ -422,8 +422,9 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
|
||||
// SI - SSE 1 & 2 scalar instructions
|
||||
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
||||
: I<o, F, outs, ins, asm, pattern, itin> {
|
||||
list<dag> pattern, InstrItinClass itin = NoItinerary,
|
||||
Domain d = GenericDomain>
|
||||
: I<o, F, outs, ins, asm, pattern, itin, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
|
@ -548,13 +548,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
|
||||
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
|
||||
X86MemOperand x86memop, string base_opc,
|
||||
string asm_opr> {
|
||||
string asm_opr, Domain d = GenericDomain> {
|
||||
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, RC:$src2),
|
||||
!strconcat(base_opc, asm_opr),
|
||||
[(set VR128:$dst, (vt (OpNode VR128:$src1,
|
||||
(scalar_to_vector RC:$src2))))],
|
||||
IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
|
||||
IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>;
|
||||
|
||||
// For the disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
@ -565,49 +565,55 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
|
||||
}
|
||||
|
||||
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
|
||||
X86MemOperand x86memop, string OpcodeStr> {
|
||||
X86MemOperand x86memop, string OpcodeStr,
|
||||
Domain d = GenericDomain> {
|
||||
// AVX
|
||||
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
|
||||
VEX_4V, VEX_LIG;
|
||||
|
||||
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
|
||||
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
|
||||
VEX, VEX_LIG, Sched<[WriteStore]>;
|
||||
// SSE1 & 2
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
|
||||
"\t{$src2, $dst|$dst, $src2}">;
|
||||
"\t{$src2, $dst|$dst, $src2}", d>;
|
||||
}
|
||||
|
||||
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
|
||||
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
|
||||
Sched<[WriteStore]>;
|
||||
}
|
||||
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_pat, string OpcodeStr> {
|
||||
PatFrag mem_pat, string OpcodeStr,
|
||||
Domain d = GenericDomain> {
|
||||
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (mem_pat addr:$src))],
|
||||
IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
|
||||
IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
|
||||
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (mem_pat addr:$src))],
|
||||
IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
|
||||
IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>;
|
||||
}
|
||||
|
||||
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
|
||||
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
|
||||
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
|
||||
SSEPackedSingle>, XS;
|
||||
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
|
||||
SSEPackedDouble>, XD;
|
||||
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1 in {
|
||||
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
|
||||
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
|
||||
SSEPackedSingle>, XS;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
|
||||
defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
|
||||
SSEPackedDouble>, XD;
|
||||
}
|
||||
|
||||
// Patterns
|
||||
|
@ -179,7 +179,7 @@ define void @test12() nounwind {
|
||||
; CHECK-LABEL: test12:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movapd 0, %xmm0
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: xorpd %xmm2, %xmm2
|
||||
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
|
||||
|
@ -7,7 +7,7 @@ define float @test1(i32 %x) nounwind readnone {
|
||||
; CHECK-NEXT: pushl %eax
|
||||
; CHECK-NEXT: movsd .LCPI0_0, %xmm0
|
||||
; CHECK-NEXT: movd {{[0-9]+}}(%esp), %xmm1
|
||||
; CHECK-NEXT: orps %xmm0, %xmm1
|
||||
; CHECK-NEXT: orpd %xmm0, %xmm1
|
||||
; CHECK-NEXT: subsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0
|
||||
|
@ -168,13 +168,13 @@ define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
|
||||
; SSE2-LABEL: vsel_double:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_double:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_double:
|
||||
@ -195,13 +195,13 @@ define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
|
||||
; SSE2-LABEL: vsel_i64:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_i64:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_i64:
|
||||
@ -362,9 +362,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm6, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm6, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm7, %xmm3
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -372,9 +372,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: movaps %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movaps %xmm5, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm6, %xmm2
|
||||
; SSSE3-NEXT: movapd %xmm6, %xmm2
|
||||
; SSSE3-NEXT: movaps %xmm7, %xmm3
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -401,9 +401,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm6, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm6, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm7, %xmm3
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -411,9 +411,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: movaps %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movaps %xmm5, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm6, %xmm2
|
||||
; SSSE3-NEXT: movapd %xmm6, %xmm2
|
||||
; SSSE3-NEXT: movaps %xmm7, %xmm3
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -446,16 +446,16 @@ define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_double4:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movaps %xmm3, %xmm1
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm3, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_double4:
|
||||
@ -558,14 +558,14 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: constant_blendvpd_avx:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movaps %xmm3, %xmm1
|
||||
; SSSE3-NEXT: movapd %xmm3, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: constant_blendvpd_avx:
|
||||
@ -744,13 +744,13 @@ define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double>
|
||||
; SSE2-LABEL: blend_shufflevector_4xdouble:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: blend_shufflevector_4xdouble:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: blend_shufflevector_4xdouble:
|
||||
|
@ -212,19 +212,19 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: shuffle_v2f64_03:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_03:
|
||||
@ -300,19 +300,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_03:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_03:
|
||||
@ -336,19 +336,19 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
; SSE2-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_03_copy:
|
||||
@ -523,19 +523,19 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
; SSE2-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_21_copy:
|
||||
@ -692,19 +692,19 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
|
||||
define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
|
||||
; SSE2-LABEL: shuffle_v2i64_z1:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_z1:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_z1:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -779,19 +779,19 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
|
||||
define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
|
||||
; SSE2-LABEL: shuffle_v2f64_z1:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_z1:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_z1:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -991,7 +991,7 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
|
||||
; SSE-LABEL: insert_reg_lo_v2f64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_reg_lo_v2f64:
|
||||
|
@ -1467,7 +1467,7 @@ define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
|
||||
; SSE-LABEL: insert_reg_lo_v4f32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_reg_lo_v4f32:
|
||||
|
@ -801,7 +801,7 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
||||
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
||||
; ALL-LABEL: insert_reg_and_zero_v4f64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; ALL-NEXT: retq
|
||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||
|
@ -2124,13 +2124,13 @@ define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_undef_input_test5:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test5:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test5:
|
||||
@ -2309,13 +2309,13 @@ define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_undef_input_test15:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test15:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test15:
|
||||
|
@ -5,7 +5,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test1:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test1:
|
||||
@ -34,7 +34,7 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
|
||||
; SSE2-LABEL: test3:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test3:
|
||||
|
@ -18,7 +18,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
|
||||
ret <4 x float> %1
|
||||
@ -226,7 +226,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: test24:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
|
||||
ret <2 x double> %1
|
||||
@ -236,7 +236,7 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test25:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
|
||||
ret <2 x i64> %1
|
||||
|
Loading…
Reference in New Issue
Block a user