1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[X86] Make movsd commutable to shufpd with a 0x02 immediate on pre-SSE4.1 targets.

This can help avoid a copy or enable load folding.

On SSE4.1 targets we can commute it to blendi instead.

I had to make shufpd with a 0x02 immediate commutable as well
since we expect commuting to be reversible.

llvm-svn: 365292
This commit is contained in:
Craig Topper 2019-07-08 06:52:43 +00:00
parent 8bb7483845
commit 238cd86fbe
18 changed files with 209 additions and 232 deletions

View File

@ -1542,20 +1542,39 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VMOVSDrr:
case X86::VMOVSSrr:{
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
if (Subtarget.hasSSE41()) {
unsigned Mask, Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
}
unsigned Mask, Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
// Convert to SHUFPD.
assert(MI.getOpcode() == X86::MOVSDrr &&
"Can only commute MOVSDrr without SSE4.1");
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
WorkingMI.setDesc(get(X86::SHUFPDrri));
WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::SHUFPDrri: {
// Commute to MOVSD.
assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(X86::MOVSDrr));
WorkingMI.RemoveOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@ -1874,13 +1893,18 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
}
return false;
}
case X86::MOVSDrr:
case X86::MOVSSrr:
case X86::VMOVSDrr:
case X86::VMOVSSrr:
// X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
// form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
// AVX implies sse4.1.
if (Subtarget.hasSSE41())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
case X86::SHUFPDrri:
// We can commute this to MOVSD.
if (MI.getOperand(3).getImm() == 0x02)
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:

View File

@ -1951,12 +1951,14 @@ let Predicates = [UseSSE1] in {
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
X86FoldableSchedWrite sched, Domain d> {
X86FoldableSchedWrite sched, Domain d,
bit IsCommutable = 0> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@ -1988,7 +1990,7 @@ let Constraints = "$src1 = $dst" in {
memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
}
//===----------------------------------------------------------------------===//

View File

@ -65,9 +65,7 @@ entry:
define <2 x double> @test_negative_zero_2(<2 x double> %A) {
; SSE2-LABEL: test_negative_zero_2:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movapd {{.*#+}} xmm1 = <u,-0.0E+0>
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_negative_zero_2:

View File

@ -9,8 +9,7 @@
define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: insert_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_f64:

View File

@ -1529,8 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
@ -1616,24 +1615,23 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: psrlq $61, %xmm0
; SSE2-NEXT: psrlq $60, %xmm3
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
; SSE2-NEXT: paddq %xmm1, %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: psrlq $3, %xmm0
; SSE2-NEXT: psrlq $4, %xmm3
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
; SSE2-NEXT: movapd {{.*#+}} xmm0 = [1152921504606846976,576460752303423488]
; SSE2-NEXT: xorpd %xmm0, %xmm3
; SSE2-NEXT: psubq %xmm0, %xmm3
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm3, %xmm1
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: psrlq $61, %xmm3
; SSE2-NEXT: psrlq $60, %xmm2
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
; SSE2-NEXT: paddq %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: psrlq $3, %xmm1
; SSE2-NEXT: psrlq $4, %xmm2
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE2-NEXT: movapd {{.*#+}} xmm1 = [1152921504606846976,576460752303423488]
; SSE2-NEXT: xorpd %xmm1, %xmm2
; SSE2-NEXT: psubq %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
@ -1745,29 +1743,28 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: psrlq $62, %xmm4
; SSE2-NEXT: paddq %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: psrad $2, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: psrlq $62, %xmm0
; SSE2-NEXT: paddq %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: psrlq $62, %xmm4
; SSE2-NEXT: paddq %xmm2, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: psrad $2, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
; SSE2-NEXT: psrlq $2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSE2-NEXT: movdqa %xmm4, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: psrlq $62, %xmm2
; SSE2-NEXT: paddq %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: psrad $2, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
; SSE2-NEXT: psrlq $2, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1]
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1]
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]

View File

@ -657,46 +657,47 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
;
; SSSE3-LABEL: test14:
; SSSE3: # %bb.0: # %vector.ph
; SSSE3-NEXT: movdqa %xmm0, %xmm5
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: movdqa %xmm5, %xmm7
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
; SSSE3-NEXT: movdqa %xmm7, %xmm8
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; SSSE3-NEXT: movdqa %xmm5, %xmm10
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; SSSE3-NEXT: pxor %xmm7, %xmm7
; SSSE3-NEXT: movdqa %xmm0, %xmm11
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm7[8],xmm11[9],xmm7[9],xmm11[10],xmm7[10],xmm11[11],xmm7[11],xmm11[12],xmm7[12],xmm11[13],xmm7[13],xmm11[14],xmm7[14],xmm11[15],xmm7[15]
; SSSE3-NEXT: movdqa %xmm11, %xmm8
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
; SSSE3-NEXT: movdqa %xmm0, %xmm10
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
; SSSE3-NEXT: movdqa %xmm2, %xmm9
; SSSE3-NEXT: pxor %xmm0, %xmm9
; SSSE3-NEXT: psubd %xmm5, %xmm2
; SSSE3-NEXT: por %xmm0, %xmm5
; SSSE3-NEXT: pxor %xmm7, %xmm9
; SSSE3-NEXT: psubd %xmm0, %xmm2
; SSSE3-NEXT: movdqa %xmm0, %xmm5
; SSSE3-NEXT: por %xmm7, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm9, %xmm5
; SSSE3-NEXT: movdqa %xmm1, %xmm6
; SSSE3-NEXT: pxor %xmm0, %xmm6
; SSSE3-NEXT: pxor %xmm7, %xmm6
; SSSE3-NEXT: psubd %xmm10, %xmm1
; SSSE3-NEXT: por %xmm0, %xmm10
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm10
; SSSE3-NEXT: pshufb %xmm9, %xmm10
; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm5[0],xmm10[1],xmm5[1]
; SSSE3-NEXT: movdqa %xmm4, %xmm5
; SSSE3-NEXT: pxor %xmm0, %xmm5
; SSSE3-NEXT: psubd %xmm7, %xmm4
; SSSE3-NEXT: por %xmm0, %xmm7
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm5, %xmm7
; SSSE3-NEXT: movdqa %xmm3, %xmm6
; SSSE3-NEXT: pxor %xmm0, %xmm6
; SSSE3-NEXT: por %xmm8, %xmm0
; SSSE3-NEXT: movdqa %xmm10, %xmm0
; SSSE3-NEXT: por %xmm7, %xmm0
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
; SSSE3-NEXT: pshufb %xmm5, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm10[0],xmm0[1]
; SSSE3-NEXT: pshufb %xmm9, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
; SSSE3-NEXT: movdqa %xmm4, %xmm5
; SSSE3-NEXT: pxor %xmm7, %xmm5
; SSSE3-NEXT: psubd %xmm11, %xmm4
; SSSE3-NEXT: por %xmm7, %xmm11
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm11
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm5, %xmm11
; SSSE3-NEXT: movdqa %xmm3, %xmm6
; SSSE3-NEXT: pxor %xmm7, %xmm6
; SSSE3-NEXT: por %xmm8, %xmm7
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
; SSSE3-NEXT: pshufb %xmm5, %xmm7
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1]
; SSSE3-NEXT: psubd %xmm8, %xmm3
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSSE3-NEXT: pand %xmm5, %xmm4

View File

@ -82,17 +82,16 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrad $3, %xmm1
; X86-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145]
; X86-NEXT: movapd %xmm0, %xmm1
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-NEXT: movapd %xmm1, %xmm0
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; X86-NEXT: pmuludq %xmm0, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
;
; X64-LABEL: test5:

View File

@ -29,9 +29,8 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
;
; X64-SSE-LABEL: test1:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movapd (%rsi), %xmm1
; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; X64-SSE-NEXT: movapd %xmm1, (%rdi)
; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
; X64-SSE-NEXT: movapd %xmm0, (%rdi)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: test1:

View File

@ -149,14 +149,12 @@ entry:
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@ -176,14 +174,12 @@ entry:
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@ -340,20 +336,16 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double8:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double8:
@ -379,20 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i648:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i648:
@ -416,18 +404,14 @@ entry:
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2-LABEL: vsel_double4:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@ -529,15 +513,13 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2-LABEL: constant_blendvpd_avx:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@ -713,14 +695,12 @@ entry:
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:

View File

@ -145,20 +145,21 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X32-SSE-NEXT: movdqa %xmm3, %xmm0
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
; X32-SSE-NEXT: psrlq %xmm1, %xmm3
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; X32-SSE-NEXT: xorps %xmm5, %xmm5
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3]
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X32-SSE-NEXT: movdqa %xmm2, %xmm3
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
; X32-SSE-NEXT: psrlq %xmm1, %xmm2
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X32-SSE-NEXT: xorpd %xmm3, %xmm0
; X32-SSE-NEXT: psubq %xmm3, %xmm0
; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm3[1]
; X32-SSE-NEXT: xorpd %xmm0, %xmm2
; X32-SSE-NEXT: psubq %xmm0, %xmm2
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
; X32-SSE-NEXT: retl
%shift = ashr <2 x i32> %a, %b
ret <2 x i32> %shift
@ -1057,21 +1058,22 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; X32-SSE-NEXT: psrad $31, %xmm0
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
; X32-SSE-NEXT: pand %xmm1, %xmm3
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
; X32-SSE-NEXT: movdqa %xmm4, %xmm0
; X32-SSE-NEXT: psrlq %xmm3, %xmm0
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,0,4294967295,0]
; X32-SSE-NEXT: pand %xmm1, %xmm0
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X32-SSE-NEXT: movdqa %xmm3, %xmm4
; X32-SSE-NEXT: psrlq %xmm0, %xmm4
; X32-SSE-NEXT: xorps %xmm5, %xmm5
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm1[0],xmm5[1,2,3]
; X32-SSE-NEXT: psrlq %xmm5, %xmm4
; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
; X32-SSE-NEXT: movdqa %xmm2, %xmm1
; X32-SSE-NEXT: psrlq %xmm5, %xmm1
; X32-SSE-NEXT: psrlq %xmm0, %xmm2
; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X32-SSE-NEXT: xorpd %xmm3, %xmm2
; X32-SSE-NEXT: psubq %xmm3, %xmm2
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
; X32-SSE-NEXT: psrlq %xmm3, %xmm2
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X32-SSE-NEXT: xorpd %xmm4, %xmm0
; X32-SSE-NEXT: psubq %xmm4, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i32> %a, %splat

View File

@ -222,20 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # %bb.0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@ -351,20 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # %bb.0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@ -382,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # %bb.0:
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@ -1085,20 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE2-LABEL: insert_reg_lo_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2f64:
; SSE3: # %bb.0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2f64:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2f64:

View File

@ -2255,20 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE2-LABEL: insert_reg_lo_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4f32:
; SSE3: # %bb.0:
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4f32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4f32:

View File

@ -1260,9 +1260,9 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: retq
@ -1459,9 +1459,9 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]

View File

@ -43,8 +43,7 @@ define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) {
define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) {
; SSSE3-LABEL: combine_pshufb_as_movsd:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_as_movsd:
@ -669,8 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend:

View File

@ -2132,14 +2132,12 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@ -2316,14 +2314,12 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:

View File

@ -7,8 +7,7 @@
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@ -46,8 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:

View File

@ -30,8 +30,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test2:
@ -107,8 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: test7:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test7:
@ -392,8 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: test24:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test24:
@ -412,8 +409,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: test25:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test25:

View File

@ -223,9 +223,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X32-NEXT: psrlq $8, %xmm2
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $1, %xmm1
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; X32-NEXT: xorpd %xmm0, %xmm1
; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X32-NEXT: xorpd %xmm2, %xmm1
; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: retl
;
@ -235,9 +235,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X64-NEXT: psrlq $8, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $1, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; X64-NEXT: xorpd %xmm0, %xmm1
; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X64-NEXT: xorpd %xmm2, %xmm1
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: retq
entry: