1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[DAG] SimplifyDemandedBits - peek through SHL if we only demand sign bits.

If we're only demanding the (shifted) sign bits of the shift source value, then we can use the value directly.

This handles SimplifyDemandedBits/SimplifyMultipleUseDemandedBits for both ISD::SHL and X86ISD::VSHLI.

Differential Revision: https://reviews.llvm.org/D80869
This commit is contained in:
Simon Pilgrim 2020-06-03 15:56:28 +01:00
parent 9cf21dc31e
commit 8ca5ce94d4
10 changed files with 172 additions and 133 deletions

View File

@ -713,6 +713,22 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = MaxSA->getZExtValue();
unsigned BitWidth = DemandedBits.getBitWidth();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
break;
}
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@ -1441,6 +1457,18 @@ bool TargetLowering::SimplifyDemandedBits(
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
}
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {

View File

@ -37152,6 +37152,14 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
}
// If we are only demanding sign bits then we can use the shift source directly.
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
unsigned UpperDemandedBits =
BitWidth - OriginalDemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
return TLO.CombineTo(Op, Op0);
if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
TLO, Depth + 1))
return true;
@ -37429,7 +37437,19 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
break;
break;
}
case X86ISD::VSHLI: {
// If we are only demanding sign bits then we can use the shift source
// directly.
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = Op.getConstantOperandVal(1);
unsigned BitWidth = DemandedBits.getBitWidth();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
return Op0;
break;
}
case X86ISD::VSRAI:
// iff we only need the sign bit then we can use the source directly.

View File

@ -1921,17 +1921,16 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
; GCN-NEXT: buffer_load_sbyte v1, off, s[4:7], 0 offset:2
; GCN-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4
; GCN-NEXT: buffer_load_sbyte v3, off, s[4:7], 0 offset:6
; GCN-NEXT: s_waitcnt vmcnt(2)
; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GCN-NEXT: v_or_b32_e32 v0, v0, v1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GCN-NEXT: v_or_b32_e32 v2, v2, v3
; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GCN-NEXT: v_or_b32_e32 v2, v2, v4
; GCN-NEXT: v_cvt_f32_i32_e32 v2, v2
; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GCN-NEXT: v_or_b32_e32 v0, v0, v4
; GCN-NEXT: v_cvt_f32_i32_e32 v0, v0
; GCN-NEXT: v_rcp_iflag_f32_e32 v4, v2
; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
; GCN-NEXT: v_ashrrev_i32_e32 v1, 30, v1
; GCN-NEXT: v_rcp_iflag_f32_e32 v4, v2
; GCN-NEXT: v_or_b32_e32 v1, 1, v1
; GCN-NEXT: v_mul_f32_e32 v3, v0, v4
; GCN-NEXT: v_trunc_f32_e32 v3, v3
@ -1960,17 +1959,16 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
; TONGA-NEXT: buffer_load_sbyte v1, off, s[4:7], 0 offset:2
; TONGA-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4
; TONGA-NEXT: buffer_load_sbyte v3, off, s[4:7], 0 offset:6
; TONGA-NEXT: s_waitcnt vmcnt(2)
; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; TONGA-NEXT: v_or_b32_e32 v0, v0, v1
; TONGA-NEXT: s_waitcnt vmcnt(0)
; TONGA-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; TONGA-NEXT: v_or_b32_e32 v2, v2, v3
; TONGA-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; TONGA-NEXT: v_or_b32_e32 v2, v2, v4
; TONGA-NEXT: v_cvt_f32_i32_e32 v2, v2
; TONGA-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; TONGA-NEXT: v_or_b32_e32 v0, v0, v4
; TONGA-NEXT: v_cvt_f32_i32_e32 v0, v0
; TONGA-NEXT: v_rcp_iflag_f32_e32 v4, v2
; TONGA-NEXT: v_xor_b32_e32 v1, v1, v3
; TONGA-NEXT: v_ashrrev_i32_e32 v1, 30, v1
; TONGA-NEXT: v_rcp_iflag_f32_e32 v4, v2
; TONGA-NEXT: v_or_b32_e32 v1, 1, v1
; TONGA-NEXT: v_mul_f32_e32 v3, v0, v4
; TONGA-NEXT: v_trunc_f32_e32 v3, v3
@ -1999,17 +1997,16 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
; GFX9-NEXT: buffer_load_sbyte v1, off, s[4:7], 0 offset:2
; GFX9-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4
; GFX9-NEXT: buffer_load_sbyte v3, off, s[4:7], 0 offset:6
; GFX9-NEXT: s_waitcnt vmcnt(2)
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
; GFX9-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v2
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 30, v1
; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v2
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
; GFX9-NEXT: v_mul_f32_e32 v3, v0, v4
; GFX9-NEXT: v_trunc_f32_e32 v3, v3

View File

@ -510,7 +510,6 @@ define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
; AVX1-LABEL: v16i8_widened_with_ones:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
; AVX1-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000

View File

@ -922,6 +922,7 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
; X32-SSE2-LABEL: PR45265:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -929,24 +930,27 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
; X32-SSE2-NEXT: leal (%eax,%eax,2), %edx
; X32-SSE2-NEXT: movzwl 8(%ecx,%edx,4), %esi
; X32-SSE2-NEXT: movsbl 10(%ecx,%edx,4), %edi
; X32-SSE2-NEXT: shll $16, %edi
; X32-SSE2-NEXT: orl %edi, %esi
; X32-SSE2-NEXT: movl %edi, %ebx
; X32-SSE2-NEXT: shll $16, %ebx
; X32-SSE2-NEXT: orl %esi, %ebx
; X32-SSE2-NEXT: movl 4(%ecx,%edx,4), %ecx
; X32-SSE2-NEXT: shrdl $8, %esi, %ecx
; X32-SSE2-NEXT: shrdl $8, %ebx, %ecx
; X32-SSE2-NEXT: xorl %eax, %ecx
; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: sarl $31, %edi
; X32-SSE2-NEXT: shldl $24, %esi, %edi
; X32-SSE2-NEXT: shldl $24, %ebx, %edi
; X32-SSE2-NEXT: xorl %eax, %edi
; X32-SSE2-NEXT: orl %edi, %ecx
; X32-SSE2-NEXT: jne .LBB44_1
; X32-SSE2-NEXT: # %bb.2:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: popl %ebx
; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL
; X32-SSE2-NEXT: .LBB44_1:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: popl %ebx
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: PR45265:

View File

@ -74,8 +74,6 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsllq $63, %xmm3, %xmm3
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq

View File

@ -597,114 +597,112 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: paddq %xmm0, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %r15, %r12
; X64-NEXT: shldq $31, %rbx, %r12
; X64-NEXT: movq %rbx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: shldq $31, %rbx, %rbp
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: movq %rbx, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %rbp, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %rbp
; X64-NEXT: movq %rbx, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %rbp, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %r15
; X64-NEXT: shrq $63, %r15
; X64-NEXT: xorl %r14d, %r15d
; X64-NEXT: testb %r15b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %r13
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r13, %rax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: testq %rbp, %rbp
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovnsq %rdx, %r13
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: cmovnsq %rcx, %rbp
; X64-NEXT: cmovnsq %rcx, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r13, %rax
; X64-NEXT: testq %rbp, %rbp
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovsq %rcx, %r13
; X64-NEXT: cmpq $-1, %rbp
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r14d, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
@ -718,11 +716,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: paddq %xmm1, %xmm1
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: movq %r14, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: movq %xmm1, %rbx
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: sarq $63, %r12
; X64-NEXT: shldq $31, %rbx, %r12
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; X64-NEXT: # xmm1 = mem[2,3,0,1]
; X64-NEXT: pxor %xmm0, %xmm0
@ -731,104 +728,103 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %r14
; X64-NEXT: shrq $63, %r14
; X64-NEXT: xorl %ebx, %r14d
; X64-NEXT: testb %r14b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: cmovbq %r13, %rax
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovnsq %rcx, %r13
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12, %xmm0
; X64-NEXT: cmovaq %r13, %rax
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovsq %rcx, %r13
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r14d, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12, %rax
; X64-NEXT: testq %r15, %r15
; X64-NEXT: testq %r14, %r14
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload

View File

@ -59,7 +59,7 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) {
; CHECK-NEXT: pextrw $1, %xmm0, %esi
; CHECK-NEXT: movswl %si, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shrl $15, %eax
; CHECK-NEXT: shrl $16, %eax
; CHECK-NEXT: leal (%rdi,%rdi), %esi
; CHECK-NEXT: shrdw $15, %ax, %si
; CHECK-NEXT: sarl $15, %edi

View File

@ -1577,7 +1577,6 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al

View File

@ -1566,7 +1566,6 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: testw %ax, %ax
; AVX1-NEXT: setne %al
@ -1657,7 +1656,6 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: testw %ax, %ax
; AVX1-NEXT: setne %al