mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[DAG] PromoteIntRes_ADDSUBSHLSAT - use promoted ISD::USUBSAT directly
As discussed on D96413, as long as the promoted bits of the args are zero we can use the basic ISD::USUBSAT pattern directly, without the shifting like we do for other ops. I think something similar should be possible for ISD::UADDSAT as well, which I'll look at later. Also, create a ISD::USUBSAT node directly - this will be expanded back by the legalizer later on if necessary. Differential Revision: https://reviews.llvm.org/D96622
This commit is contained in:
parent
443904009e
commit
a0306a3243
@ -772,6 +772,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
|
||||
EVT PromotedType = Op1Promoted.getValueType();
|
||||
unsigned NewBits = PromotedType.getScalarSizeInBits();
|
||||
|
||||
// USUBSAT can always be promoted as long as we have zero-extended the args.
|
||||
if (Opcode == ISD::USUBSAT)
|
||||
return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
|
||||
Op2Promoted);
|
||||
|
||||
// Shift cannot use a min/max expansion, we can't detect overflow if all of
|
||||
// the bits have been shifted out.
|
||||
if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
|
||||
@ -783,7 +788,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
|
||||
ShiftOp = ISD::SRA;
|
||||
break;
|
||||
case ISD::UADDSAT:
|
||||
case ISD::USUBSAT:
|
||||
case ISD::USHLSAT:
|
||||
ShiftOp = ISD::SRL;
|
||||
break;
|
||||
@ -806,12 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
|
||||
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
|
||||
}
|
||||
|
||||
if (Opcode == ISD::USUBSAT) {
|
||||
SDValue Max =
|
||||
DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
|
||||
return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
|
||||
}
|
||||
|
||||
if (Opcode == ISD::UADDSAT) {
|
||||
APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
|
||||
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
|
||||
|
@ -129,10 +129,7 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
|
||||
; CHECK-NEXT: mov v1.h[2], w9
|
||||
; CHECK-NEXT: mov v0.h[3], w10
|
||||
; CHECK-NEXT: mov v1.h[3], w11
|
||||
; CHECK-NEXT: shl v1.4h, v1.4h, #8
|
||||
; CHECK-NEXT: shl v0.4h, v0.4h, #8
|
||||
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: ushr v0.4h, v0.4h, #8
|
||||
; CHECK-NEXT: xtn v0.8b, v0.8h
|
||||
; CHECK-NEXT: str s0, [x2]
|
||||
; CHECK-NEXT: ret
|
||||
@ -154,10 +151,7 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w10
|
||||
; CHECK-NEXT: mov v1.s[1], w11
|
||||
; CHECK-NEXT: shl v1.2s, v1.2s, #24
|
||||
; CHECK-NEXT: shl v0.2s, v0.2s, #24
|
||||
; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: fmov w9, s0
|
||||
; CHECK-NEXT: strb w9, [x2]
|
||||
@ -196,10 +190,7 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
|
||||
; CHECK-NEXT: fmov s1, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w10
|
||||
; CHECK-NEXT: mov v1.s[1], w11
|
||||
; CHECK-NEXT: shl v1.2s, v1.2s, #16
|
||||
; CHECK-NEXT: shl v0.2s, v0.2s, #16
|
||||
; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: fmov w9, s0
|
||||
; CHECK-NEXT: strh w9, [x2]
|
||||
@ -272,12 +263,9 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
|
||||
; CHECK-LABEL: v16i4:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi v2.16b, #15
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: shl v1.16b, v1.16b, #4
|
||||
; CHECK-NEXT: shl v0.16b, v0.16b, #4
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ushr v0.16b, v0.16b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
|
||||
ret <16 x i4> %z
|
||||
@ -287,12 +275,9 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; CHECK-LABEL: v16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi v2.16b, #1
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: shl v1.16b, v1.16b, #7
|
||||
; CHECK-NEXT: shl v0.16b, v0.16b, #7
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ushr v0.16b, v0.16b, #7
|
||||
; CHECK-NEXT: ret
|
||||
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
|
||||
ret <16 x i1> %z
|
||||
|
@ -17,19 +17,13 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) {
|
||||
; GFX8-LABEL: v_usubsat_i8:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
|
||||
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
|
||||
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 8, v0
|
||||
; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_usubsat_i8:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||
; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0
|
||||
; GFX9-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
|
||||
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v0
|
||||
; GFX9-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs)
|
||||
ret i8 %result
|
||||
|
@ -104,34 +104,31 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
|
||||
; CHECK-T1-LABEL: func16:
|
||||
; CHECK-T1: @ %bb.0:
|
||||
; CHECK-T1-NEXT: muls r1, r2, r1
|
||||
; CHECK-T1-NEXT: uxth r2, r1
|
||||
; CHECK-T1-NEXT: cmp r0, r2
|
||||
; CHECK-T1-NEXT: bhi .LBB2_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: mov r0, r2
|
||||
; CHECK-T1-NEXT: .LBB2_2:
|
||||
; CHECK-T1-NEXT: uxth r1, r1
|
||||
; CHECK-T1-NEXT: subs r0, r0, r1
|
||||
; CHECK-T1-NEXT: bhs .LBB2_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: movs r0, #0
|
||||
; CHECK-T1-NEXT: .LBB2_2:
|
||||
; CHECK-T1-NEXT: uxth r0, r0
|
||||
; CHECK-T1-NEXT: bx lr
|
||||
;
|
||||
; CHECK-T2-LABEL: func16:
|
||||
; CHECK-T2: @ %bb.0:
|
||||
; CHECK-T2-NEXT: mul r3, r1, r2
|
||||
; CHECK-T2-NEXT: uxth r3, r3
|
||||
; CHECK-T2-NEXT: cmp r0, r3
|
||||
; CHECK-T2-NEXT: it hi
|
||||
; CHECK-T2-NEXT: movhi r3, r0
|
||||
; CHECK-T2-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-T2-NEXT: muls r1, r2, r1
|
||||
; CHECK-T2-NEXT: uxth r1, r1
|
||||
; CHECK-T2-NEXT: subs r0, r0, r1
|
||||
; CHECK-T2-NEXT: it lo
|
||||
; CHECK-T2-NEXT: movlo r0, #0
|
||||
; CHECK-T2-NEXT: uxth r0, r0
|
||||
; CHECK-T2-NEXT: bx lr
|
||||
;
|
||||
; CHECK-ARM-LABEL: func16:
|
||||
; CHECK-ARM: @ %bb.0:
|
||||
; CHECK-ARM-NEXT: mul r3, r1, r2
|
||||
; CHECK-ARM-NEXT: uxth r3, r3
|
||||
; CHECK-ARM-NEXT: cmp r0, r3
|
||||
; CHECK-ARM-NEXT: movhi r3, r0
|
||||
; CHECK-ARM-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-ARM-NEXT: mul r1, r1, r2
|
||||
; CHECK-ARM-NEXT: uxth r1, r1
|
||||
; CHECK-ARM-NEXT: subs r0, r0, r1
|
||||
; CHECK-ARM-NEXT: movlo r0, #0
|
||||
; CHECK-ARM-NEXT: uxth r0, r0
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
%a = mul i16 %y, %z
|
||||
@ -143,34 +140,31 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
|
||||
; CHECK-T1-LABEL: func8:
|
||||
; CHECK-T1: @ %bb.0:
|
||||
; CHECK-T1-NEXT: muls r1, r2, r1
|
||||
; CHECK-T1-NEXT: uxtb r2, r1
|
||||
; CHECK-T1-NEXT: cmp r0, r2
|
||||
; CHECK-T1-NEXT: bhi .LBB3_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: mov r0, r2
|
||||
; CHECK-T1-NEXT: .LBB3_2:
|
||||
; CHECK-T1-NEXT: uxtb r1, r1
|
||||
; CHECK-T1-NEXT: subs r0, r0, r1
|
||||
; CHECK-T1-NEXT: bhs .LBB3_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: movs r0, #0
|
||||
; CHECK-T1-NEXT: .LBB3_2:
|
||||
; CHECK-T1-NEXT: uxtb r0, r0
|
||||
; CHECK-T1-NEXT: bx lr
|
||||
;
|
||||
; CHECK-T2-LABEL: func8:
|
||||
; CHECK-T2: @ %bb.0:
|
||||
; CHECK-T2-NEXT: mul r3, r1, r2
|
||||
; CHECK-T2-NEXT: uxtb r3, r3
|
||||
; CHECK-T2-NEXT: cmp r0, r3
|
||||
; CHECK-T2-NEXT: it hi
|
||||
; CHECK-T2-NEXT: movhi r3, r0
|
||||
; CHECK-T2-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-T2-NEXT: muls r1, r2, r1
|
||||
; CHECK-T2-NEXT: uxtb r1, r1
|
||||
; CHECK-T2-NEXT: subs r0, r0, r1
|
||||
; CHECK-T2-NEXT: it lo
|
||||
; CHECK-T2-NEXT: movlo r0, #0
|
||||
; CHECK-T2-NEXT: uxtb r0, r0
|
||||
; CHECK-T2-NEXT: bx lr
|
||||
;
|
||||
; CHECK-ARM-LABEL: func8:
|
||||
; CHECK-ARM: @ %bb.0:
|
||||
; CHECK-ARM-NEXT: smulbb r3, r1, r2
|
||||
; CHECK-ARM-NEXT: uxtb r3, r3
|
||||
; CHECK-ARM-NEXT: cmp r0, r3
|
||||
; CHECK-ARM-NEXT: movhi r3, r0
|
||||
; CHECK-ARM-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-ARM-NEXT: smulbb r1, r1, r2
|
||||
; CHECK-ARM-NEXT: uxtb r1, r1
|
||||
; CHECK-ARM-NEXT: subs r0, r0, r1
|
||||
; CHECK-ARM-NEXT: movlo r0, #0
|
||||
; CHECK-ARM-NEXT: uxtb r0, r0
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
%a = mul i8 %y, %z
|
||||
@ -183,35 +177,31 @@ define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
|
||||
; CHECK-T1: @ %bb.0:
|
||||
; CHECK-T1-NEXT: muls r1, r2, r1
|
||||
; CHECK-T1-NEXT: movs r2, #15
|
||||
; CHECK-T1-NEXT: mov r3, r1
|
||||
; CHECK-T1-NEXT: ands r3, r2
|
||||
; CHECK-T1-NEXT: cmp r0, r3
|
||||
; CHECK-T1-NEXT: bhi .LBB4_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: mov r0, r3
|
||||
; CHECK-T1-NEXT: .LBB4_2:
|
||||
; CHECK-T1-NEXT: ands r1, r2
|
||||
; CHECK-T1-NEXT: subs r0, r0, r1
|
||||
; CHECK-T1-NEXT: bhs .LBB4_2
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
; CHECK-T1-NEXT: movs r0, #0
|
||||
; CHECK-T1-NEXT: .LBB4_2:
|
||||
; CHECK-T1-NEXT: ands r0, r2
|
||||
; CHECK-T1-NEXT: bx lr
|
||||
;
|
||||
; CHECK-T2-LABEL: func4:
|
||||
; CHECK-T2: @ %bb.0:
|
||||
; CHECK-T2-NEXT: mul r3, r1, r2
|
||||
; CHECK-T2-NEXT: and r3, r3, #15
|
||||
; CHECK-T2-NEXT: cmp r0, r3
|
||||
; CHECK-T2-NEXT: it hi
|
||||
; CHECK-T2-NEXT: movhi r3, r0
|
||||
; CHECK-T2-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-T2-NEXT: muls r1, r2, r1
|
||||
; CHECK-T2-NEXT: and r1, r1, #15
|
||||
; CHECK-T2-NEXT: subs r0, r0, r1
|
||||
; CHECK-T2-NEXT: it lo
|
||||
; CHECK-T2-NEXT: movlo r0, #0
|
||||
; CHECK-T2-NEXT: and r0, r0, #15
|
||||
; CHECK-T2-NEXT: bx lr
|
||||
;
|
||||
; CHECK-ARM-LABEL: func4:
|
||||
; CHECK-ARM: @ %bb.0:
|
||||
; CHECK-ARM-NEXT: smulbb r3, r1, r2
|
||||
; CHECK-ARM-NEXT: and r3, r3, #15
|
||||
; CHECK-ARM-NEXT: cmp r0, r3
|
||||
; CHECK-ARM-NEXT: movhi r3, r0
|
||||
; CHECK-ARM-NEXT: mls r0, r1, r2, r3
|
||||
; CHECK-ARM-NEXT: smulbb r1, r1, r2
|
||||
; CHECK-ARM-NEXT: and r1, r1, #15
|
||||
; CHECK-ARM-NEXT: subs r0, r0, r1
|
||||
; CHECK-ARM-NEXT: movlo r0, #0
|
||||
; CHECK-ARM-NEXT: and r0, r0, #15
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
%a = mul i4 %y, %z
|
||||
|
@ -111,22 +111,15 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
|
||||
define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
|
||||
; X86-LABEL: func4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: mulb {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %eax, %edx
|
||||
; X86-NEXT: andb $15, %dl
|
||||
; X86-NEXT: movzbl %dl, %esi
|
||||
; X86-NEXT: movzbl %cl, %ebx
|
||||
; X86-NEXT: cmpb %dl, %cl
|
||||
; X86-NEXT: cmovbel %esi, %ebx
|
||||
; X86-NEXT: subb %al, %bl
|
||||
; X86-NEXT: movzbl %bl, %eax
|
||||
; X86-NEXT: andb $15, %al
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: subb %al, %cl
|
||||
; X86-NEXT: movzbl %cl, %eax
|
||||
; X86-NEXT: cmovbl %edx, %eax
|
||||
; X86-NEXT: andl $15, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: func4:
|
||||
@ -134,13 +127,11 @@ define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: mulb %dl
|
||||
; X64-NEXT: movl %eax, %ecx
|
||||
; X64-NEXT: andb $15, %cl
|
||||
; X64-NEXT: movzbl %cl, %ecx
|
||||
; X64-NEXT: cmpb %cl, %dil
|
||||
; X64-NEXT: cmoval %edi, %ecx
|
||||
; X64-NEXT: subb %al, %cl
|
||||
; X64-NEXT: movzbl %cl, %eax
|
||||
; X64-NEXT: andb $15, %al
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: subb %al, %dil
|
||||
; X64-NEXT: movzbl %dil, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: andl $15, %eax
|
||||
; X64-NEXT: retq
|
||||
%a = mul i4 %y, %z
|
||||
|
@ -481,26 +481,18 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
|
||||
define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
|
||||
; SSE-LABEL: v16i4:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $4, %xmm1
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: psllw $4, %xmm0
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: psubusb %xmm1, %xmm0
|
||||
; SSE-NEXT: psrlw $4, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v16i4:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
|
||||
ret <16 x i4> %z
|
||||
@ -509,38 +501,26 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
|
||||
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; SSE-LABEL: v16i1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $7, %xmm1
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; SSE-NEXT: pand %xmm2, %xmm1
|
||||
; SSE-NEXT: psllw $7, %xmm0
|
||||
; SSE-NEXT: pand %xmm2, %xmm0
|
||||
; SSE-NEXT: psubusb %xmm1, %xmm0
|
||||
; SSE-NEXT: psrlw $7, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v16i1:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v16i1:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: v16i1:
|
||||
|
Loading…
x
Reference in New Issue
Block a user