mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert DAGCombine "hoist binop with const" folds
Appear to introduce test-suite compile-time hang. http://lab.llvm.org:8011/builders/clang-cmake-x86_64-sde-avx512-linux/builds/22825 This reverts r361852,r361853,r361854,r361855,r361856 llvm-svn: 361865
This commit is contained in:
parent
880b46e5b8
commit
7040f72aca
@ -2303,13 +2303,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// (x - y) + -1 -> add (xor y, -1), x
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
|
||||
isAllOnesOrAllOnesSplat(N1)) {
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
|
||||
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
|
||||
}
|
||||
|
||||
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
|
||||
return Combined;
|
||||
|
||||
@ -2461,14 +2454,6 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
|
||||
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
|
||||
return V;
|
||||
|
||||
// Hoist one-use subtraction by constant: (x - C) + y -> (x + y) - C
|
||||
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
|
||||
isConstantOrConstantVector(N0.getOperand(1))) {
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
|
||||
}
|
||||
|
||||
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
|
||||
// rather than 'add 0/-1' (the zext should get folded).
|
||||
// add (sext i1 Y), X --> sub X, (zext i1 Y)
|
||||
@ -2938,33 +2923,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
|
||||
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
|
||||
return V;
|
||||
|
||||
// (x - y) - 1 -> add (xor y, -1), x
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
|
||||
DAG.getAllOnesConstant(DL, VT));
|
||||
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
|
||||
}
|
||||
|
||||
// Hoist one-use addition by constant: (x + C) - y -> (x - y) + C
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
|
||||
isConstantOrConstantVector(N0.getOperand(1))) {
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
|
||||
}
|
||||
// y - (x + C) -> (y - x) - C
|
||||
if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
|
||||
isConstantOrConstantVector(N1.getOperand(1))) {
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
|
||||
return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
|
||||
}
|
||||
// (x - C) - y -> (x - y) - C
|
||||
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
|
||||
isConstantOrConstantVector(N0.getOperand(1))) {
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
|
||||
}
|
||||
|
||||
// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
|
||||
// rather than 'sub 0/1' (the sext should get folded).
|
||||
// sub X, (zext i1 Y) --> add X, (sext i1 Y)
|
||||
|
@ -486,7 +486,8 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
|
||||
define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg w8, w1
|
||||
; CHECK-NEXT: mov w8, #32
|
||||
; CHECK-NEXT: sub w8, w8, w1
|
||||
; CHECK-NEXT: sub w8, w8, w2
|
||||
; CHECK-NEXT: lsr w0, w0, w8
|
||||
; CHECK-NEXT: ret
|
||||
@ -499,7 +500,8 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
|
||||
define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
|
||||
; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg x8, x1
|
||||
; CHECK-NEXT: mov w8, #64
|
||||
; CHECK-NEXT: sub x8, x8, x1
|
||||
; CHECK-NEXT: sub x8, x8, x2
|
||||
; CHECK-NEXT: lsr x0, x0, x8
|
||||
; CHECK-NEXT: ret
|
||||
@ -513,7 +515,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
|
||||
define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: add w8, w2, w1
|
||||
; CHECK-NEXT: add w8, w1, w2
|
||||
; CHECK-NEXT: lsr w0, w0, w8
|
||||
; CHECK-NEXT: ret
|
||||
%nega = sub i32 0, %a
|
||||
@ -525,7 +527,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
|
||||
define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
|
||||
; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: add x8, x2, x1
|
||||
; CHECK-NEXT: add x8, x1, x2
|
||||
; CHECK-NEXT: lsr x0, x0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%nega = sub i64 0, %a
|
||||
|
@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: sink_add_of_const_to_sub:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub w8, w0, w1
|
||||
; CHECK-NEXT: sub w8, w8, w2
|
||||
; CHECK-NEXT: add w0, w8, #32 // =32
|
||||
; CHECK-NEXT: add w8, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w0, w8, w2
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = add i32 %t0, 32 ; constant always on RHS
|
||||
@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: sink_add_of_const_to_sub2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub w8, w1, w0
|
||||
; CHECK-NEXT: add w8, w2, w8
|
||||
; CHECK-NEXT: sub w0, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w8, w0, w1
|
||||
; CHECK-NEXT: add w8, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w0, w2, w8
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = add i32 %t0, 32 ; constant always on RHS
|
||||
@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: sink_sub_of_const_to_sub:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub w8, w0, w1
|
||||
; CHECK-NEXT: sub w8, w8, w2
|
||||
; CHECK-NEXT: sub w0, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w8, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w0, w8, w2
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = sub i32 %t0, 32
|
||||
@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: sink_sub_from_const_to_sub:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sub w8, w1, w0
|
||||
; CHECK-NEXT: sub w8, w8, w2
|
||||
; CHECK-NEXT: add w0, w8, #32 // =32
|
||||
; CHECK-NEXT: add w8, w8, #32 // =32
|
||||
; CHECK-NEXT: sub w0, w8, w2
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = sub i32 32, %t0
|
||||
@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; CHECK-NEXT: adrp x8, .LCPI14_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_0]
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; CHECK-NEXT: adrp x8, .LCPI15_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_0]
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; CHECK-NEXT: adrp x8, .LCPI18_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
||||
@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI19_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI19_0]
|
||||
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
||||
@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; CHECK-NEXT: adrp x8, .LCPI20_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_0]
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
|
||||
; CHECK-NEXT: adrp x8, .LCPI22_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_0]
|
||||
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
|
||||
@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
|
||||
; CHECK-NEXT: adrp x8, .LCPI23_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_0]
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
|
||||
|
@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
|
||||
define i32 @add_of_not(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: add_of_not:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mvn w8, w1
|
||||
; CHECK-NEXT: add w0, w8, w0
|
||||
; CHECK-NEXT: sub w8, w0, w1
|
||||
; CHECK-NEXT: sub w0, w8, #1 // =1
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %x, %y
|
||||
%r = add i32 %t0, -1
|
||||
@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
|
||||
define i32 @add_of_not_decrement(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: add_of_not_decrement:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mvn w8, w1
|
||||
; CHECK-NEXT: add w0, w8, w0
|
||||
; CHECK-NEXT: sub w8, w0, w1
|
||||
; CHECK-NEXT: sub w0, w8, #1 // =1
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub i32 %x, %y
|
||||
%r = sub i32 %t0, 1
|
||||
@ -40,8 +40,9 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
|
||||
define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vec_add_of_not:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mvn v1.16b, v1.16b
|
||||
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %x, %y
|
||||
%r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
@ -51,8 +52,9 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
|
||||
define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: vec_add_of_not_decrement:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mvn v1.16b, v1.16b
|
||||
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: movi v1.4s, #1
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %x, %y
|
||||
%r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
@ -9,16 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
|
||||
; VARIANT0: ; %bb.0: ; %entry
|
||||
; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb
|
||||
; VARIANT0-NEXT: v_not_b32_e32 v3, v0
|
||||
; VARIANT0-NEXT: s_mov_b32 s7, 0xf000
|
||||
; VARIANT0-NEXT: s_mov_b32 s6, 0
|
||||
; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; VARIANT0-NEXT: v_mov_b32_e32 v2, 0
|
||||
; VARIANT0-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
|
||||
; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3
|
||||
; VARIANT0-NEXT: s_add_i32 s2, s2, -1
|
||||
; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; VARIANT0-NEXT: s_barrier
|
||||
; VARIANT0-NEXT: v_sub_i32_e32 v3, vcc, s2, v0
|
||||
; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
|
||||
; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
|
||||
@ -30,15 +30,15 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
|
||||
; VARIANT1: ; %bb.0: ; %entry
|
||||
; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb
|
||||
; VARIANT1-NEXT: v_not_b32_e32 v3, v0
|
||||
; VARIANT1-NEXT: s_mov_b32 s7, 0xf000
|
||||
; VARIANT1-NEXT: s_mov_b32 s6, 0
|
||||
; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; VARIANT1-NEXT: v_mov_b32_e32 v2, 0
|
||||
; VARIANT1-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
|
||||
; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3
|
||||
; VARIANT1-NEXT: s_add_i32 s2, s2, -1
|
||||
; VARIANT1-NEXT: s_barrier
|
||||
; VARIANT1-NEXT: v_sub_i32_e32 v3, vcc, s2, v0
|
||||
; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
|
||||
; VARIANT1-NEXT: s_waitcnt expcnt(0)
|
||||
@ -59,7 +59,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
|
||||
; VARIANT2-NEXT: global_store_dword v[1:2], v0, off
|
||||
; VARIANT2-NEXT: s_waitcnt vmcnt(0)
|
||||
; VARIANT2-NEXT: s_barrier
|
||||
; VARIANT2-NEXT: v_xad_u32 v3, v0, -1, s0
|
||||
; VARIANT2-NEXT: s_add_i32 s0, s0, -1
|
||||
; VARIANT2-NEXT: v_sub_u32_e32 v3, s0, v0
|
||||
; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
|
||||
; VARIANT2-NEXT: v_mov_b32_e32 v0, s3
|
||||
@ -81,7 +82,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
|
||||
; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
|
||||
; VARIANT3-NEXT: global_store_dword v[1:2], v0, off
|
||||
; VARIANT3-NEXT: s_barrier
|
||||
; VARIANT3-NEXT: v_xad_u32 v3, v0, -1, s0
|
||||
; VARIANT3-NEXT: s_add_i32 s0, s0, -1
|
||||
; VARIANT3-NEXT: v_sub_u32_e32 v3, s0, v0
|
||||
; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
|
||||
; VARIANT3-NEXT: v_mov_b32_e32 v0, s3
|
||||
|
@ -15,9 +15,10 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
|
||||
; V8-NEXT: .cfi_register 15, 31
|
||||
; V8-NEXT: add %i0, 7, %i0
|
||||
; V8-NEXT: and %i0, -8, %i0
|
||||
; V8-NEXT: add %i0, 8, %i0
|
||||
; V8-NEXT: sub %sp, %i0, %i0
|
||||
; V8-NEXT: add %i0, -8, %sp
|
||||
; V8-NEXT: add %i0, 88, %o0
|
||||
; V8-NEXT: add %i0, 96, %o0
|
||||
; V8-NEXT: mov %i0, %sp
|
||||
; V8-NEXT: add %sp, -16, %sp
|
||||
; V8-NEXT: st %o0, [%sp+104]
|
||||
; V8-NEXT: st %o0, [%sp+100]
|
||||
|
@ -75,12 +75,13 @@ define void @f3(i64 %len) {
|
||||
; CHECK-NEXT: lgr %r11, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: sllg %r0, %r2, 3
|
||||
; CHECK-NEXT: sllg %r2, %r2, 3
|
||||
; CHECK-NEXT: la %r0, 120(%r2)
|
||||
; CHECK-NEXT: sgr %r1, %r0
|
||||
; CHECK-NEXT: lay %r15, -120(%r1)
|
||||
; CHECK-NEXT: la %r1, 160(%r1)
|
||||
; CHECK-NEXT: nill %r1, 65408
|
||||
; CHECK-NEXT: mvghi 0(%r1), 10
|
||||
; CHECK-NEXT: la %r2, 280(%r1)
|
||||
; CHECK-NEXT: nill %r2, 65408
|
||||
; CHECK-NEXT: lgr %r15, %r1
|
||||
; CHECK-NEXT: mvghi 0(%r2), 10
|
||||
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
|
||||
; CHECK-NEXT: br %r14
|
||||
%x = alloca i64, i64 %len, align 128
|
||||
|
@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
|
||||
define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
|
||||
; SSE-LABEL: combine_vec_add_sub_sub:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: paddd %xmm2, %xmm1
|
||||
; SSE-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_add_sub_sub:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%1 = sub <4 x i32> %a, %b
|
||||
%2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
|
||||
|
@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je LBB0_55
|
||||
; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720
|
||||
; CHECK-NEXT: movq %rdx, %rbx
|
||||
; CHECK-NEXT: movq %rdi, %rbp
|
||||
; CHECK-NEXT: movq %rdx, %r14
|
||||
; CHECK-NEXT: movq %rdi, %r15
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: cmpq %rax, %rcx
|
||||
@ -78,11 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: movl $32, %esi
|
||||
; CHECK-NEXT: callq _memset
|
||||
; CHECK-NEXT: LBB0_8: ## %while.body.preheader
|
||||
; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410
|
||||
; CHECK-NEXT: imulq $1040, %r14, %rax ## imm = 0x410
|
||||
; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx
|
||||
; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx
|
||||
; CHECK-NEXT: movl $1, %r15d
|
||||
; CHECK-NEXT: movl $1, %r14d
|
||||
; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax
|
||||
; CHECK-NEXT: movb $1, %cl
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
@ -92,47 +91,48 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: testb %cl, %cl
|
||||
; CHECK-NEXT: jne LBB0_9
|
||||
; CHECK-NEXT: ## %bb.10: ## %do.end
|
||||
; CHECK-NEXT: xorl %r14d, %r14d
|
||||
; CHECK-NEXT: testb %r14b, %r14b
|
||||
; CHECK-NEXT: xorl %r12d, %r12d
|
||||
; CHECK-NEXT: testb %r12b, %r12b
|
||||
; CHECK-NEXT: jne LBB0_11
|
||||
; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: xorl %r13d, %r13d
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: jmp LBB0_13
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_14: ## %while.body200
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: leal 1(%r14), %eax
|
||||
; CHECK-NEXT: leal 1(%r12), %eax
|
||||
; CHECK-NEXT: cmpl $21, %eax
|
||||
; CHECK-NEXT: ja LBB0_20
|
||||
; CHECK-NEXT: ## %bb.15: ## %while.body200
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $-1, %r13d
|
||||
; CHECK-NEXT: movl $-1, %ecx
|
||||
; CHECK-NEXT: movslq (%rsi,%rax,4), %rax
|
||||
; CHECK-NEXT: addq %rsi, %rax
|
||||
; CHECK-NEXT: jmpq *%rax
|
||||
; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $1, %r13d
|
||||
; CHECK-NEXT: movl $1, %ecx
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: movl %r12d, %ecx
|
||||
; CHECK-NEXT: jne LBB0_21
|
||||
; CHECK-NEXT: jmp LBB0_55
|
||||
; CHECK-NEXT: LBB0_26: ## %sw.bb474
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: ## implicit-def: $r12
|
||||
; CHECK-NEXT: ## implicit-def: $r13
|
||||
; CHECK-NEXT: jne LBB0_34
|
||||
; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: ## implicit-def: $r12
|
||||
; CHECK-NEXT: ## implicit-def: $r13
|
||||
; CHECK-NEXT: jne LBB0_34
|
||||
; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: js LBB0_55
|
||||
; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
|
||||
; CHECK-NEXT: movq %rax, %r12
|
||||
; CHECK-NEXT: movq %rax, %r13
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: jne LBB0_32
|
||||
; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
|
||||
@ -157,15 +157,16 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: je LBB0_34
|
||||
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
|
||||
; CHECK-NEXT: leaq 1(%r12), %rax
|
||||
; CHECK-NEXT: leaq 1(%r13), %rax
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: jne LBB0_29
|
||||
; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: incq %r12
|
||||
; CHECK-NEXT: incq %r13
|
||||
; CHECK-NEXT: LBB0_34: ## %if.end517
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: leal -324(%r13), %eax
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
|
||||
; CHECK-NEXT: addl $-324, %eax ## imm = 0xFEBC
|
||||
; CHECK-NEXT: cmpl $59, %eax
|
||||
; CHECK-NEXT: ja LBB0_35
|
||||
; CHECK-NEXT: ## %bb.57: ## %if.end517
|
||||
@ -175,11 +176,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: jb LBB0_38
|
||||
; CHECK-NEXT: LBB0_35: ## %if.end517
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: cmpl $11, %r13d
|
||||
; CHECK-NEXT: cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
|
||||
; CHECK-NEXT: je LBB0_38
|
||||
; CHECK-NEXT: ## %bb.36: ## %if.end517
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: cmpl $24, %r13d
|
||||
; CHECK-NEXT: cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
|
||||
; CHECK-NEXT: je LBB0_38
|
||||
; CHECK-NEXT: ## %bb.37: ## %if.then532
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
@ -194,8 +195,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: movb $0, (%r12)
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: movb $0, (%r13)
|
||||
; CHECK-NEXT: movl %r12d, %ecx
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
@ -207,22 +208,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: jb LBB0_55
|
||||
; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C
|
||||
; CHECK-NEXT: movl $268, %ecx ## imm = 0x10C
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_19: ## %sw.bb243
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $2, %r13d
|
||||
; CHECK-NEXT: movl $2, %ecx
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_40: ## %sw.bb566
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $20, %r13d
|
||||
; CHECK-NEXT: movl $20, %ecx
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_13: ## %while.body200
|
||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ## Child Loop BB0_29 Depth 2
|
||||
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
|
||||
; CHECK-NEXT: leal -268(%r14), %eax
|
||||
; CHECK-NEXT: leal -268(%r12), %eax
|
||||
; CHECK-NEXT: cmpl $105, %eax
|
||||
; CHECK-NEXT: ja LBB0_14
|
||||
; CHECK-NEXT: ## %bb.56: ## %while.body200
|
||||
@ -232,12 +233,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: jmpq *%rax
|
||||
; CHECK-NEXT: LBB0_20: ## %sw.bb256
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: movl %r12d, %ecx
|
||||
; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: decl %r15d
|
||||
; CHECK-NEXT: testl %r15d, %r15d
|
||||
; CHECK-NEXT: movl %r13d, %r14d
|
||||
; CHECK-NEXT: decl %r14d
|
||||
; CHECK-NEXT: testl %r14d, %r14d
|
||||
; CHECK-NEXT: movl %ecx, %r12d
|
||||
; CHECK-NEXT: jg LBB0_13
|
||||
; CHECK-NEXT: jmp LBB0_22
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
@ -254,28 +255,27 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: jmp LBB0_25
|
||||
; CHECK-NEXT: LBB0_11:
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: xorl %r13d, %r13d
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: LBB0_22: ## %while.end1465
|
||||
; CHECK-NEXT: incl %r13d
|
||||
; CHECK-NEXT: cmpl $16, %r13d
|
||||
; CHECK-NEXT: incl %ecx
|
||||
; CHECK-NEXT: cmpl $16, %ecx
|
||||
; CHECK-NEXT: ja LBB0_50
|
||||
; CHECK-NEXT: ## %bb.23: ## %while.end1465
|
||||
; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801
|
||||
; CHECK-NEXT: btl %r13d, %eax
|
||||
; CHECK-NEXT: btl %ecx, %eax
|
||||
; CHECK-NEXT: jae LBB0_50
|
||||
; CHECK-NEXT: ## %bb.24:
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: LBB0_48: ## %if.then1477
|
||||
; CHECK-NEXT: movl $1, %edx
|
||||
; CHECK-NEXT: callq _write
|
||||
; CHECK-NEXT: subq %rbp, %rbx
|
||||
; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax
|
||||
; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax
|
||||
; CHECK-NEXT: addq $8189, %r15 ## imm = 0x1FFD
|
||||
; CHECK-NEXT: subq %rbx, %r15
|
||||
; CHECK-NEXT: addq _syHistory@{{.*}}(%rip), %r15
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_49: ## %for.body1723
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: decq %rax
|
||||
; CHECK-NEXT: decq %r15
|
||||
; CHECK-NEXT: jmp LBB0_49
|
||||
; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader
|
||||
; CHECK-NEXT: movl $512, %eax ## imm = 0x200
|
||||
@ -302,8 +302,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: LBB0_55: ## %if.then.i
|
||||
; CHECK-NEXT: ud2
|
||||
; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
|
||||
; CHECK-NEXT: movq %rbx, %rbp
|
||||
; CHECK-NEXT: movq %r15, %rbx
|
||||
; CHECK-NEXT: jmp LBB0_48
|
||||
; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
|
@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
|
||||
; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: xorl %ecx, %ecx
|
||||
; X32-NEXT: movl $32, %ecx
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
|
||||
;
|
||||
; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %esi, %ecx
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: negl %ecx
|
||||
; X64-NEXT: movl $32, %ecx
|
||||
; X64-NEXT: subl %esi, %ecx
|
||||
; X64-NEXT: subl %edx, %ecx
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrl %cl, %eax
|
||||
@ -1139,10 +1139,9 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: xorl %ecx, %ecx
|
||||
; X32-NEXT: movl $64, %ecx
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addb $64, %cl
|
||||
; X32-NEXT: movl %esi, %edx
|
||||
; X32-NEXT: shrl %cl, %edx
|
||||
; X32-NEXT: shrdl %cl, %esi, %eax
|
||||
|
@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: addl $32, %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sink_add_of_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: subl %esi, %edi
|
||||
; X64-NEXT: subl %edx, %edi
|
||||
; X64-NEXT: leal 32(%rdi), %eax
|
||||
; X64-NEXT: subl %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = add i32 %t0, 32 ; constant always on RHS
|
||||
@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl %ecx, %eax
|
||||
; X32-NEXT: addl $-32, %eax
|
||||
; X32-NEXT: addl $32, %ecx
|
||||
; X32-NEXT: subl %ecx, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sink_add_of_const_to_sub2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-NEXT: subl %edi, %esi
|
||||
; X64-NEXT: leal -32(%rdx,%rsi), %eax
|
||||
; X64-NEXT: movl %edx, %eax
|
||||
; X64-NEXT: subl %esi, %edi
|
||||
; X64-NEXT: addl $32, %edi
|
||||
; X64-NEXT: subl %edi, %eax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = add i32 %t0, 32 ; constant always on RHS
|
||||
@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: addl $-32, %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sink_sub_of_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: subl %esi, %edi
|
||||
; X64-NEXT: subl %edx, %edi
|
||||
; X64-NEXT: leal -32(%rdi), %eax
|
||||
; X64-NEXT: subl %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = sub i32 %t0, 32
|
||||
@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: addl $32, %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sink_sub_from_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-NEXT: subl %edi, %esi
|
||||
; X64-NEXT: subl %edx, %esi
|
||||
; X64-NEXT: leal 32(%rsi), %eax
|
||||
; X64-NEXT: subl %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i32 %a, %b
|
||||
%t1 = sub i32 32, %t0
|
||||
@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; X32-LABEL: vec_sink_sub_of_const_to_add0:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: paddd %xmm2, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_of_const_to_add0:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: paddd %xmm2, %xmm1
|
||||
; X64-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; X32-LABEL: vec_sink_sub_of_const_to_add1:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: paddd %xmm2, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_of_const_to_add1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: paddd %xmm2, %xmm1
|
||||
; X64-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; X32-LABEL: vec_sink_add_of_const_to_sub:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd %xmm2, %xmm0
|
||||
; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: psubd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_add_of_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd %xmm2, %xmm0
|
||||
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psubd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
||||
@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; X32-LABEL: vec_sink_add_of_const_to_sub2:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm0, %xmm1
|
||||
; X32-NEXT: paddd %xmm2, %xmm1
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: psubd %xmm0, %xmm2
|
||||
; X32-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_add_of_const_to_sub2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm0, %xmm1
|
||||
; X64-NEXT: paddd %xmm2, %xmm1
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psubd %xmm0, %xmm2
|
||||
; X64-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
||||
@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
|
||||
; X32-LABEL: vec_sink_sub_of_const_to_sub:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd %xmm2, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: psubd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_of_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd %xmm2, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psubd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
|
||||
; X32-LABEL: vec_sink_sub_from_const_to_sub:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm0, %xmm1
|
||||
; X32-NEXT: psubd %xmm2, %xmm1
|
||||
; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: psubd %xmm2, %xmm1
|
||||
; X32-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_from_const_to_sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm0, %xmm1
|
||||
; X64-NEXT: psubd %xmm2, %xmm1
|
||||
; X64-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: psubd %xmm2, %xmm1
|
||||
; X64-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
|
||||
; X32-LABEL: vec_sink_sub_from_const_to_sub2:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_from_const_to_sub2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
|
||||
|
@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: psubb %xmm2, %xmm0
|
||||
; SSE2-NEXT: psrlw $7, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE2-NEXT: psubb %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE41-NEXT: psubb %xmm2, %xmm0
|
||||
; SSE41-NEXT: psrlw $7, %xmm1
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE41-NEXT: psubb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2NOBW-LABEL: test_div7_16i8:
|
||||
@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vzeroupper
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm2
|
||||
; SSE2-NEXT: psubb %xmm3, %xmm2
|
||||
; SSE2-NEXT: psrlw $7, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE2-NEXT: psubb %xmm3, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psllw $3, %xmm2
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm2
|
||||
; SSE41-NEXT: psubb %xmm3, %xmm2
|
||||
; SSE41-NEXT: psrlw $7, %xmm1
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE41-NEXT: psubb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psllw $3, %xmm2
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
|
@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_div7_32i8:
|
||||
@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <32 x i8> %res
|
||||
@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
|
||||
@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsllw $3, %ymm1, %ymm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
|
@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_div7_64i8:
|
||||
@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <64 x i8> %res
|
||||
@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
|
||||
; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
|
||||
@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vpand %ymm8, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsubb %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
|
||||
|
@ -532,24 +532,22 @@ define i32 @add_of_not(i32 %x, i32 %y) {
|
||||
; X32-LABEL: add_of_not:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: notl %eax
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: decl %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LIN-LABEL: add_of_not:
|
||||
; X64-LIN: # %bb.0:
|
||||
; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-LIN-NEXT: notl %esi
|
||||
; X64-LIN-NEXT: leal (%rsi,%rdi), %eax
|
||||
; X64-LIN-NEXT: subl %esi, %edi
|
||||
; X64-LIN-NEXT: leal -1(%rdi), %eax
|
||||
; X64-LIN-NEXT: retq
|
||||
;
|
||||
; X64-WIN-LABEL: add_of_not:
|
||||
; X64-WIN: # %bb.0:
|
||||
; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx
|
||||
; X64-WIN-NEXT: notl %edx
|
||||
; X64-WIN-NEXT: leal (%rdx,%rcx), %eax
|
||||
; X64-WIN-NEXT: subl %edx, %ecx
|
||||
; X64-WIN-NEXT: leal -1(%rcx), %eax
|
||||
; X64-WIN-NEXT: retq
|
||||
%t0 = sub i32 %x, %y
|
||||
%r = add i32 %t0, -1
|
||||
@ -560,24 +558,22 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
|
||||
; X32-LABEL: add_of_not_decrement:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: notl %eax
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: decl %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LIN-LABEL: add_of_not_decrement:
|
||||
; X64-LIN: # %bb.0:
|
||||
; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-LIN-NEXT: notl %esi
|
||||
; X64-LIN-NEXT: leal (%rsi,%rdi), %eax
|
||||
; X64-LIN-NEXT: subl %esi, %edi
|
||||
; X64-LIN-NEXT: leal -1(%rdi), %eax
|
||||
; X64-LIN-NEXT: retq
|
||||
;
|
||||
; X64-WIN-LABEL: add_of_not_decrement:
|
||||
; X64-WIN: # %bb.0:
|
||||
; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx
|
||||
; X64-WIN-NEXT: notl %edx
|
||||
; X64-WIN-NEXT: leal (%rdx,%rcx), %eax
|
||||
; X64-WIN-NEXT: subl %edx, %ecx
|
||||
; X64-WIN-NEXT: leal -1(%rcx), %eax
|
||||
; X64-WIN-NEXT: retq
|
||||
%t0 = sub i32 %x, %y
|
||||
%r = sub i32 %t0, 1
|
||||
@ -587,23 +583,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
|
||||
define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
|
||||
; X32-LABEL: vec_add_of_not:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; X32-NEXT: pxor %xmm1, %xmm2
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LIN-LABEL: vec_add_of_not:
|
||||
; X64-LIN: # %bb.0:
|
||||
; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; X64-LIN-NEXT: pxor %xmm1, %xmm2
|
||||
; X64-LIN-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-LIN-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-LIN-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-LIN-NEXT: retq
|
||||
;
|
||||
; X64-WIN-LABEL: vec_add_of_not:
|
||||
; X64-WIN: # %bb.0:
|
||||
; X64-WIN-NEXT: movdqa (%rcx), %xmm1
|
||||
; X64-WIN-NEXT: psubd (%rdx), %xmm1
|
||||
; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; X64-WIN-NEXT: pxor (%rdx), %xmm0
|
||||
; X64-WIN-NEXT: paddd (%rcx), %xmm0
|
||||
; X64-WIN-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-WIN-NEXT: retq
|
||||
%t0 = sub <4 x i32> %x, %y
|
||||
%r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
@ -613,23 +610,24 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
|
||||
define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
|
||||
; X32-LABEL: vec_add_of_not_decrement:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; X32-NEXT: pxor %xmm1, %xmm2
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LIN-LABEL: vec_add_of_not_decrement:
|
||||
; X64-LIN: # %bb.0:
|
||||
; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; X64-LIN-NEXT: pxor %xmm1, %xmm2
|
||||
; X64-LIN-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-LIN-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-LIN-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-LIN-NEXT: retq
|
||||
;
|
||||
; X64-WIN-LABEL: vec_add_of_not_decrement:
|
||||
; X64-WIN: # %bb.0:
|
||||
; X64-WIN-NEXT: movdqa (%rcx), %xmm1
|
||||
; X64-WIN-NEXT: psubd (%rdx), %xmm1
|
||||
; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; X64-WIN-NEXT: pxor (%rdx), %xmm0
|
||||
; X64-WIN-NEXT: paddd (%rcx), %xmm0
|
||||
; X64-WIN-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-WIN-NEXT: retq
|
||||
%t0 = sub <4 x i32> %x, %y
|
||||
%r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
@ -15,27 +15,30 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
|
||||
; CHECK-NEXT: subq %rax, %rsi
|
||||
; CHECK-NEXT: movq (%rdx), %rax
|
||||
; CHECK-NEXT: movswl 8(%rdi), %edx
|
||||
; CHECK-NEXT: movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
|
||||
; CHECK-NEXT: movswl (%rax,%rsi,2), %eax
|
||||
; CHECK-NEXT: movl $1, %esi
|
||||
; CHECK-NEXT: imull %edx, %eax
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6
|
||||
; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6
|
||||
; CHECK-NEXT: movslq %eax, %rdi
|
||||
; CHECK-NEXT: movslq %eax, %r8
|
||||
; CHECK-NEXT: setl %dl
|
||||
; CHECK-NEXT: cmpl $2138875573, %eax # imm = 0x7F7CA6B5
|
||||
; CHECK-NEXT: movq %rdi, %r8
|
||||
; CHECK-NEXT: movq %r8, %r9
|
||||
; CHECK-NEXT: leal -1(%rdx,%rdx), %edx
|
||||
; CHECK-NEXT: cmovlel %edx, %esi
|
||||
; CHECK-NEXT: subq %rax, %r8
|
||||
; CHECK-NEXT: subq %rax, %r9
|
||||
; CHECK-NEXT: addq %r8, %rdi
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpl $1, %esi
|
||||
; CHECK-NEXT: cmovneq %rax, %r8
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: cmovnsq %rax, %r8
|
||||
; CHECK-NEXT: movq (%rcx), %rax
|
||||
; CHECK-NEXT: subq %r8, %rdi
|
||||
; CHECK-NEXT: leaq -2138875574(%rax,%rdi), %rax
|
||||
; CHECK-NEXT: cmovneq %rax, %r9
|
||||
; CHECK-NEXT: testl %r8d, %r8d
|
||||
; CHECK-NEXT: cmovnsq %rax, %r9
|
||||
; CHECK-NEXT: movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
|
||||
; CHECK-NEXT: subq %r9, %rdi
|
||||
; CHECK-NEXT: addq (%rcx), %rdi
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: movq %rax, (%rcx)
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user