mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Make movsd commutable to shufpd with a 0x02 immediate on pre-SSE4.1 targets.
This can help avoid a copy or enable load folding. On SSE4.1 targets we can commute it to blendi instead. I had to make shufpd with a 0x02 immediate commutable as well since we expect commuting to be reversible. llvm-svn: 365292
This commit is contained in:
parent
8bb7483845
commit
238cd86fbe
@ -1542,20 +1542,39 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
|
||||
case X86::VMOVSDrr:
|
||||
case X86::VMOVSSrr:{
|
||||
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
|
||||
assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
|
||||
if (Subtarget.hasSSE41()) {
|
||||
unsigned Mask, Opc;
|
||||
switch (MI.getOpcode()) {
|
||||
default: llvm_unreachable("Unreachable!");
|
||||
case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
|
||||
case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
|
||||
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
|
||||
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
|
||||
}
|
||||
|
||||
unsigned Mask, Opc;
|
||||
switch (MI.getOpcode()) {
|
||||
default: llvm_unreachable("Unreachable!");
|
||||
case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
|
||||
case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
|
||||
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
|
||||
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
|
||||
auto &WorkingMI = cloneIfNew(MI);
|
||||
WorkingMI.setDesc(get(Opc));
|
||||
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
|
||||
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
|
||||
OpIdx1, OpIdx2);
|
||||
}
|
||||
|
||||
// Convert to SHUFPD.
|
||||
assert(MI.getOpcode() == X86::MOVSDrr &&
|
||||
"Can only commute MOVSDrr without SSE4.1");
|
||||
|
||||
auto &WorkingMI = cloneIfNew(MI);
|
||||
WorkingMI.setDesc(get(Opc));
|
||||
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
|
||||
WorkingMI.setDesc(get(X86::SHUFPDrri));
|
||||
WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
|
||||
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
|
||||
OpIdx1, OpIdx2);
|
||||
}
|
||||
case X86::SHUFPDrri: {
|
||||
// Commute to MOVSD.
|
||||
assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
|
||||
auto &WorkingMI = cloneIfNew(MI);
|
||||
WorkingMI.setDesc(get(X86::MOVSDrr));
|
||||
WorkingMI.RemoveOperand(3);
|
||||
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
|
||||
OpIdx1, OpIdx2);
|
||||
}
|
||||
@ -1874,13 +1893,18 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case X86::MOVSDrr:
|
||||
case X86::MOVSSrr:
|
||||
case X86::VMOVSDrr:
|
||||
case X86::VMOVSSrr:
|
||||
// X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
|
||||
// form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
|
||||
// AVX implies sse4.1.
|
||||
if (Subtarget.hasSSE41())
|
||||
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
|
||||
return false;
|
||||
case X86::SHUFPDrri:
|
||||
// We can commute this to MOVSD.
|
||||
if (MI.getOperand(3).getImm() == 0x02)
|
||||
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
|
||||
return false;
|
||||
case X86::MOVHLPSrr:
|
||||
case X86::UNPCKHPDrr:
|
||||
case X86::VMOVHLPSrr:
|
||||
|
@ -1951,12 +1951,14 @@ let Predicates = [UseSSE1] in {
|
||||
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
|
||||
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
|
||||
ValueType vt, string asm, PatFrag mem_frag,
|
||||
X86FoldableSchedWrite sched, Domain d> {
|
||||
X86FoldableSchedWrite sched, Domain d,
|
||||
bit IsCommutable = 0> {
|
||||
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
|
||||
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
|
||||
(i8 imm:$src3))))], d>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
let isCommutable = IsCommutable in
|
||||
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
|
||||
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
|
||||
@ -1988,7 +1990,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
|
||||
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
|
||||
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
|
||||
memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -65,9 +65,7 @@ entry:
|
||||
define <2 x double> @test_negative_zero_2(<2 x double> %A) {
|
||||
; SSE2-LABEL: test_negative_zero_2:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm1 = <u,-0.0E+0>
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_negative_zero_2:
|
||||
|
@ -9,8 +9,7 @@
|
||||
define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
|
||||
; SSE2-LABEL: insert_f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_f64:
|
||||
|
@ -1529,8 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
|
||||
; SSE2-NEXT: psrlq $2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
|
||||
@ -1616,24 +1615,23 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
|
||||
; SSE2-NEXT: psrlq $2, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSE2-NEXT: psrlq $61, %xmm0
|
||||
; SSE2-NEXT: psrlq $60, %xmm3
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
|
||||
; SSE2-NEXT: paddq %xmm1, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSE2-NEXT: psrlq $3, %xmm0
|
||||
; SSE2-NEXT: psrlq $4, %xmm3
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm0 = [1152921504606846976,576460752303423488]
|
||||
; SSE2-NEXT: xorpd %xmm0, %xmm3
|
||||
; SSE2-NEXT: psubq %xmm0, %xmm3
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm1
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: psrlq $61, %xmm3
|
||||
; SSE2-NEXT: psrlq $60, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
|
||||
; SSE2-NEXT: paddq %xmm1, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrlq $3, %xmm1
|
||||
; SSE2-NEXT: psrlq $4, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm1 = [1152921504606846976,576460752303423488]
|
||||
; SSE2-NEXT: xorpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: psubq %xmm1, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
|
||||
@ -1745,29 +1743,28 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
|
||||
define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
|
||||
; SSE2-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrad $31, %xmm4
|
||||
; SSE2-NEXT: psrlq $62, %xmm4
|
||||
; SSE2-NEXT: paddq %xmm0, %xmm4
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-NEXT: psrad $2, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
|
||||
; SSE2-NEXT: psrlq $2, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: psrlq $62, %xmm0
|
||||
; SSE2-NEXT: paddq %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: psrad $31, %xmm4
|
||||
; SSE2-NEXT: psrlq $62, %xmm4
|
||||
; SSE2-NEXT: paddq %xmm2, %xmm4
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-NEXT: psrad $2, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
|
||||
; SSE2-NEXT: psrlq $2, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: psrlq $62, %xmm2
|
||||
; SSE2-NEXT: paddq %xmm4, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSE2-NEXT: psrad $2, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
|
||||
; SSE2-NEXT: psrlq $2, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1]
|
||||
; SSE2-NEXT: psrlq $2, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: psrad $31, %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
|
||||
|
@ -657,46 +657,47 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
|
||||
;
|
||||
; SSSE3-LABEL: test14:
|
||||
; SSSE3: # %bb.0: # %vector.ph
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm7
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
|
||||
; SSSE3-NEXT: movdqa %xmm7, %xmm8
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm10
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm7
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm11
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm7[8],xmm11[9],xmm7[9],xmm11[10],xmm7[10],xmm11[11],xmm7[11],xmm11[12],xmm7[12],xmm11[13],xmm7[13],xmm11[14],xmm7[14],xmm11[15],xmm7[15]
|
||||
; SSSE3-NEXT: movdqa %xmm11, %xmm8
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm10
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm9
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm9
|
||||
; SSSE3-NEXT: psubd %xmm5, %xmm2
|
||||
; SSSE3-NEXT: por %xmm0, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm9
|
||||
; SSSE3-NEXT: psubd %xmm0, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSSE3-NEXT: por %xmm7, %xmm5
|
||||
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; SSSE3-NEXT: pshufb %xmm9, %xmm5
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm6
|
||||
; SSSE3-NEXT: psubd %xmm10, %xmm1
|
||||
; SSSE3-NEXT: por %xmm0, %xmm10
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm10
|
||||
; SSSE3-NEXT: pshufb %xmm9, %xmm10
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm5[0],xmm10[1],xmm5[1]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm5
|
||||
; SSSE3-NEXT: psubd %xmm7, %xmm4
|
||||
; SSSE3-NEXT: por %xmm0, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
|
||||
; SSSE3-NEXT: pshufb %xmm5, %xmm7
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm6
|
||||
; SSSE3-NEXT: por %xmm8, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm10, %xmm0
|
||||
; SSSE3-NEXT: por %xmm7, %xmm0
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
|
||||
; SSSE3-NEXT: pshufb %xmm5, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm10[0],xmm0[1]
|
||||
; SSSE3-NEXT: pshufb %xmm9, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm5
|
||||
; SSSE3-NEXT: psubd %xmm11, %xmm4
|
||||
; SSSE3-NEXT: por %xmm7, %xmm11
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm11
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
|
||||
; SSSE3-NEXT: pshufb %xmm5, %xmm11
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm6
|
||||
; SSSE3-NEXT: por %xmm8, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
|
||||
; SSSE3-NEXT: pshufb %xmm5, %xmm7
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1]
|
||||
; SSSE3-NEXT: psubd %xmm8, %xmm3
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
|
||||
; SSSE3-NEXT: pand %xmm5, %xmm4
|
||||
|
@ -82,17 +82,16 @@ define <4 x i32> @test5(<4 x i32> %x) {
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-NEXT: psrad $3, %xmm1
|
||||
; X86-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
|
||||
; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145]
|
||||
; X86-NEXT: movapd %xmm0, %xmm1
|
||||
; X86-NEXT: pmuludq %xmm2, %xmm1
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; X86-NEXT: movapd %xmm1, %xmm0
|
||||
; X86-NEXT: pmuludq %xmm2, %xmm0
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; X86-NEXT: pmuludq %xmm0, %xmm2
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
||||
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; X86-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test5:
|
||||
|
@ -29,9 +29,8 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
|
||||
;
|
||||
; X64-SSE-LABEL: test1:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movapd (%rsi), %xmm1
|
||||
; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; X64-SSE-NEXT: movapd %xmm1, (%rdi)
|
||||
; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
|
||||
; X64-SSE-NEXT: movapd %xmm0, (%rdi)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: test1:
|
||||
|
@ -149,14 +149,12 @@ entry:
|
||||
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
|
||||
; SSE2-LABEL: vsel_double:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_double:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_double:
|
||||
@ -176,14 +174,12 @@ entry:
|
||||
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
|
||||
; SSE2-LABEL: vsel_i64:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_i64:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_i64:
|
||||
@ -340,20 +336,16 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movaps %xmm7, %xmm3
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm6, %xmm2
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_double8:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movaps %xmm7, %xmm3
|
||||
; SSSE3-NEXT: movaps %xmm5, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: movapd %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm6, %xmm2
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_double8:
|
||||
@ -379,20 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movaps %xmm7, %xmm3
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm6, %xmm2
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_i648:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movaps %xmm7, %xmm3
|
||||
; SSSE3-NEXT: movaps %xmm5, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: movapd %xmm4, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm6, %xmm2
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_i648:
|
||||
@ -416,18 +404,14 @@ entry:
|
||||
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
|
||||
; SSE2-LABEL: vsel_double4:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: vsel_double4:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movapd %xmm3, %xmm1
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: vsel_double4:
|
||||
@ -529,15 +513,13 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
|
||||
; SSE2-LABEL: constant_blendvpd_avx:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: constant_blendvpd_avx:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: movapd %xmm3, %xmm1
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: constant_blendvpd_avx:
|
||||
@ -713,14 +695,12 @@ entry:
|
||||
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
|
||||
; SSE2-LABEL: blend_shufflevector_4xdouble:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: blend_shufflevector_4xdouble:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: blend_shufflevector_4xdouble:
|
||||
|
@ -145,20 +145,21 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: xorps %xmm5, %xmm5
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm3
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: psubq %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm3[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: psubq %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%shift = ashr <2 x i32> %a, %b
|
||||
ret <2 x i32> %shift
|
||||
@ -1057,21 +1058,22 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; X32-SSE-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
|
||||
; X32-SSE-NEXT: pand %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,0,4294967295,0]
|
||||
; X32-SSE-NEXT: pand %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: xorps %xmm5, %xmm5
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm1[0],xmm5[1,2,3]
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm4
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: psubq %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: psubq %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%shift = ashr <2 x i32> %a, %splat
|
||||
|
@ -222,20 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
|
||||
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: shuffle_v2f64_03:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_03:
|
||||
@ -351,20 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_03:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_03:
|
||||
@ -382,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
|
||||
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movapd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_03_copy:
|
||||
@ -1085,20 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
|
||||
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: insert_reg_lo_v2f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_reg_lo_v2f64:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_reg_lo_v2f64:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_reg_lo_v2f64:
|
||||
|
@ -2255,20 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
|
||||
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: insert_reg_lo_v4f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_reg_lo_v4f32:
|
||||
; SSE3: # %bb.0:
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_reg_lo_v4f32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_reg_lo_v4f32:
|
||||
|
@ -1260,9 +1260,9 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
@ -1459,9 +1459,9 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE2-LABEL: shuffle_v8i16_012dcde3:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
|
||||
|
@ -43,8 +43,7 @@ define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) {
|
||||
define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSSE3-LABEL: combine_pshufb_as_movsd:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_pshufb_as_movsd:
|
||||
@ -669,8 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
|
||||
define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend:
|
||||
|
@ -2132,14 +2132,12 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_undef_input_test5:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test5:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test5:
|
||||
@ -2316,14 +2314,12 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_undef_input_test15:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test15:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test15:
|
||||
|
@ -7,8 +7,7 @@
|
||||
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test1:
|
||||
@ -46,8 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
|
||||
; SSE2-LABEL: test3:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test3:
|
||||
|
@ -30,8 +30,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: test2:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test2:
|
||||
@ -107,8 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
|
||||
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE2-LABEL: test7:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test7:
|
||||
@ -392,8 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
|
||||
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: test24:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test24:
|
||||
@ -412,8 +409,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
|
||||
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: test25:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test25:
|
||||
|
@ -223,9 +223,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
|
||||
; X32-NEXT: psrlq $8, %xmm2
|
||||
; X32-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-NEXT: psrlq $1, %xmm1
|
||||
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; X32-NEXT: xorpd %xmm0, %xmm1
|
||||
; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
|
||||
; X32-NEXT: xorpd %xmm2, %xmm1
|
||||
; X32-NEXT: movapd %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
@ -235,9 +235,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
|
||||
; X64-NEXT: psrlq $8, %xmm2
|
||||
; X64-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-NEXT: psrlq $1, %xmm1
|
||||
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X64-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; X64-NEXT: xorpd %xmm0, %xmm1
|
||||
; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
|
||||
; X64-NEXT: xorpd %xmm2, %xmm1
|
||||
; X64-NEXT: movapd %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user