1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[SystemZ] Improve codegen for certain SADDO-immediate cases

When performing an add-with-overflow with an immediate in the
range -2G ... -4G, code currently loads the immediate into a
register, which generally takes two instructions.

In this particular case, it is preferable to load the negated
immediate into a register instead, which always only requires
one instruction, and then perform a subtract.

llvm-svn: 357597
This commit is contained in:
Ulrich Weigand 2019-04-03 15:09:19 +00:00
parent 60c8d3580e
commit 47cfb1bca2
3 changed files with 87 additions and 8 deletions

View File

@ -1029,6 +1029,14 @@ let AddedComplexity = 1 in {
(AGFI GR64:$src1, imm64sx32n:$src2)>;
}
// And vice versa in one special case, where we need to load a
// constant into a register in any case, but the negated constant
// requires fewer instructions to load.
def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2),
(SGR GR64:$src1, (LLILH imm64lh16n:$src2))>;
def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
(SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
// Subtraction producing a carry.
let Defs = [CC] in {
// Subtraction of a register.

View File

@ -188,6 +188,17 @@ def HF32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Negated variants.
def NEGLH16 : SDNodeXForm<imm, [{
uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
def NEGLF32 : SDNodeXForm<imm, [{
uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL;
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Truncate an immediate to a 8-bit signed quantity.
def SIMM8 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
@ -430,6 +441,15 @@ def imm64hf32c : Immediate<i64, [{
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">;
// Negated immediates that fit LF32 or LH16.
def imm64lh16n : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
}], NEGLH16, "U16Imm">;
def imm64lf32n : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
}], NEGLF32, "U32Imm">;
// Short immediates.
def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());

View File

@ -151,12 +151,12 @@ define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) {
ret i1 %obit
}
; Check the next value down, which must use register addition instead.
; Check the next value down, which can use register subtraction instead.
define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f10:
; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295
; CHECK: agr [[REG1]], %r3
; CHECK-DAG: stg [[REG1]], 0(%r4)
; CHECK: llilf [[REG1:%r[0-9]+]], 2147483649
; CHECK: sgr %r3, [[REG1]]
; CHECK-DAG: stg %r3, 0(%r4)
; CHECK-DAG: ipm [[REG:%r[0-5]]]
; CHECK-DAG: afi [[REG]], 1342177280
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
@ -168,9 +168,60 @@ define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) {
ret i1 %obit
}
; Check using the overflow result for a branch.
define void @f11(i64 %dummy, i64 %a, i64 *%res) {
; We may be able to use LLILH instead of LLILF.
define zeroext i1 @f11(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f11:
; CHECK: llilh [[REG1:%r[0-9]+]], 32769
; CHECK: sgr %r3, [[REG1]]
; CHECK-DAG: stg %r3, 0(%r4)
; CHECK-DAG: ipm [[REG:%r[0-5]]]
; CHECK-DAG: afi [[REG]], 1342177280
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
; CHECK: br %r14
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147549184)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64 *%res
ret i1 %obit
}
; Check low end of the LLILF/SGR range.
define zeroext i1 @f12(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f12:
; CHECK: llilf [[REG1:%r[0-9]+]], 4294967295
; CHECK: sgr %r3, [[REG1]]
; CHECK-DAG: stg %r3, 0(%r4)
; CHECK-DAG: ipm [[REG:%r[0-5]]]
; CHECK-DAG: afi [[REG]], 1342177280
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
; CHECK: br %r14
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967295)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64 *%res
ret i1 %obit
}
; Check the next value down, which must use register addition instead.
define zeroext i1 @f13(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f13:
; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295
; CHECK: agr [[REG1]], %r3
; CHECK-DAG: stg [[REG1]], 0(%r4)
; CHECK-DAG: ipm [[REG:%r[0-5]]]
; CHECK-DAG: afi [[REG]], 1342177280
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
; CHECK: br %r14
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967296)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64 *%res
ret i1 %obit
}
; Check using the overflow result for a branch.
define void @f14(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f14:
; CHECK: aghi %r3, 1
; CHECK: stg %r3, 0(%r4)
; CHECK: {{jgo foo@PLT|bnor %r14}}
@ -190,8 +241,8 @@ exit:
}
; ... and the same with the inverted direction.
define void @f12(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f12:
define void @f15(i64 %dummy, i64 %a, i64 *%res) {
; CHECK-LABEL: f15:
; CHECK: aghi %r3, 1
; CHECK: stg %r3, 0(%r4)
; CHECK: {{jgno foo@PLT|bor %r14}}