1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[x86] Give movss and movsd execution domains in the x86 backend.

This associates movss and movsd with the packed single and packed double
execution domains (resp.). While this is largely cosmetic, as we now
don't have weird ping-pong-ing between single and double precision, it
is also useful because it avoids the domain fixing algorithm from seeing
domain breaks that don't actually exist. It will also be much more
important if we have an execution domain default other than packed
single, as that would cause us to mix movss and movsd with integer
vector code on a regular basis, a very bad mixture.

llvm-svn: 228135
This commit is contained in:
Chandler Carruth 2015-02-04 10:58:53 +00:00
parent 7381b41ffe
commit 99f7e3a3dd
11 changed files with 75 additions and 68 deletions

View File

@ -422,8 +422,9 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin> {
list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],

View File

@ -548,13 +548,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string base_opc,
string asm_opr> {
string asm_opr, Domain d = GenericDomain> {
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>;
// For the disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
@ -565,49 +565,55 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string OpcodeStr> {
X86MemOperand x86memop, string OpcodeStr,
Domain d = GenericDomain> {
// AVX
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
VEX_4V, VEX_LIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $dst|$dst, $src2}">;
"\t{$src2, $dst|$dst, $src2}", d>;
}
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_pat, string OpcodeStr> {
PatFrag mem_pat, string OpcodeStr,
Domain d = GenericDomain> {
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
SSEPackedSingle>, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
SSEPackedDouble>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
SSEPackedSingle>, XS;
let AddedComplexity = 20 in
defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
SSEPackedDouble>, XD;
}
// Patterns

View File

@ -179,7 +179,7 @@ define void @test12() nounwind {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: movapd 0, %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: xorpd %xmm2, %xmm2
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]

View File

@ -7,7 +7,7 @@ define float @test1(i32 %x) nounwind readnone {
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movsd .LCPI0_0, %xmm0
; CHECK-NEXT: movd {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: orps %xmm0, %xmm1
; CHECK-NEXT: orpd %xmm0, %xmm1
; CHECK-NEXT: subsd %xmm0, %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0

View File

@ -168,13 +168,13 @@ define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@ -195,13 +195,13 @@ define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@ -362,9 +362,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSE2-NEXT: movaps %xmm4, %xmm0
; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: movaps %xmm6, %xmm2
; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
@ -372,9 +372,9 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSSE3-NEXT: movaps %xmm4, %xmm0
; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: movaps %xmm6, %xmm2
; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@ -401,9 +401,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSE2-NEXT: movaps %xmm4, %xmm0
; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: movaps %xmm6, %xmm2
; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
@ -411,9 +411,9 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSSE3-NEXT: movaps %xmm4, %xmm0
; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: movaps %xmm6, %xmm2
; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@ -446,16 +446,16 @@ define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@ -558,14 +558,14 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@ -744,13 +744,13 @@ define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double>
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:

View File

@ -212,19 +212,19 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@ -300,19 +300,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@ -336,19 +336,19 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE3-NEXT: movaps %xmm2, %xmm0
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@ -523,19 +523,19 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; SSE2-LABEL: shuffle_v2i64_21_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_21_copy:
@ -692,19 +692,19 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; SSE2-LABEL: shuffle_v2i64_z1:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: xorpd %xmm1, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_z1:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: xorpd %xmm1, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_z1:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: xorpd %xmm1, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
@ -779,19 +779,19 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
; SSE2-LABEL: shuffle_v2f64_z1:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: xorpd %xmm1, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_z1:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: xorpd %xmm1, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_z1:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: xorpd %xmm1, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
@ -991,7 +991,7 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_lo_v2f64:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v2f64:

View File

@ -1467,7 +1467,7 @@ define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_lo_v4f32:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v4f32:

View File

@ -801,7 +801,7 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; ALL-NEXT: retq
%v = insertelement <4 x double> undef, double %a, i32 0

View File

@ -2124,13 +2124,13 @@ define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@ -2309,13 +2309,13 @@ define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:

View File

@ -5,7 +5,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@ -34,7 +34,7 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:

View File

@ -18,7 +18,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@ -226,7 +226,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test24:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@ -236,7 +236,7 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test25:
; CHECK: # BB#0:
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1