mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[SystemZ] Improve codegen for certain SADDO-immediate cases
When performing an add-with-overflow with an immediate in the range -2G ... -4G, code currently loads the immediate into a register, which generally takes two instructions. In this particular case, it is preferable to load the negated immediate into a register instead, which always only requires one instruction, and then perform a subtract. llvm-svn: 357597
This commit is contained in:
parent
60c8d3580e
commit
47cfb1bca2
@ -1029,6 +1029,14 @@ let AddedComplexity = 1 in {
|
||||
(AGFI GR64:$src1, imm64sx32n:$src2)>;
|
||||
}
|
||||
|
||||
// And vice versa in one special case, where we need to load a
|
||||
// constant into a register in any case, but the negated constant
|
||||
// requires fewer instructions to load.
|
||||
def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2),
|
||||
(SGR GR64:$src1, (LLILH imm64lh16n:$src2))>;
|
||||
def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
|
||||
(SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
|
||||
|
||||
// Subtraction producing a carry.
|
||||
let Defs = [CC] in {
|
||||
// Subtraction of a register.
|
||||
|
@ -188,6 +188,17 @@ def HF32 : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
|
||||
}]>;
|
||||
|
||||
// Negated variants.
|
||||
def NEGLH16 : SDNodeXForm<imm, [{
|
||||
uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
|
||||
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
|
||||
}]>;
|
||||
|
||||
def NEGLF32 : SDNodeXForm<imm, [{
|
||||
uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL;
|
||||
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
|
||||
}]>;
|
||||
|
||||
// Truncate an immediate to a 8-bit signed quantity.
|
||||
def SIMM8 : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
|
||||
@ -430,6 +441,15 @@ def imm64hf32c : Immediate<i64, [{
|
||||
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
|
||||
}], HF32, "U32Imm">;
|
||||
|
||||
// Negated immediates that fit LF32 or LH16.
|
||||
def imm64lh16n : Immediate<i64, [{
|
||||
return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
|
||||
}], NEGLH16, "U16Imm">;
|
||||
|
||||
def imm64lf32n : Immediate<i64, [{
|
||||
return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
|
||||
}], NEGLF32, "U32Imm">;
|
||||
|
||||
// Short immediates.
|
||||
def imm64sx8 : Immediate<i64, [{
|
||||
return isInt<8>(N->getSExtValue());
|
||||
|
@ -151,12 +151,12 @@ define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) {
|
||||
ret i1 %obit
|
||||
}
|
||||
|
||||
; Check the next value down, which must use register addition instead.
|
||||
; Check the next value down, which can use register subtraction instead.
|
||||
define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295
|
||||
; CHECK: agr [[REG1]], %r3
|
||||
; CHECK-DAG: stg [[REG1]], 0(%r4)
|
||||
; CHECK: llilf [[REG1:%r[0-9]+]], 2147483649
|
||||
; CHECK: sgr %r3, [[REG1]]
|
||||
; CHECK-DAG: stg %r3, 0(%r4)
|
||||
; CHECK-DAG: ipm [[REG:%r[0-5]]]
|
||||
; CHECK-DAG: afi [[REG]], 1342177280
|
||||
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
|
||||
@ -168,9 +168,60 @@ define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) {
|
||||
ret i1 %obit
|
||||
}
|
||||
|
||||
; Check using the overflow result for a branch.
|
||||
define void @f11(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; We may be able to use LLILH instead of LLILF.
|
||||
define zeroext i1 @f11(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: llilh [[REG1:%r[0-9]+]], 32769
|
||||
; CHECK: sgr %r3, [[REG1]]
|
||||
; CHECK-DAG: stg %r3, 0(%r4)
|
||||
; CHECK-DAG: ipm [[REG:%r[0-5]]]
|
||||
; CHECK-DAG: afi [[REG]], 1342177280
|
||||
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
|
||||
; CHECK: br %r14
|
||||
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147549184)
|
||||
%val = extractvalue {i64, i1} %t, 0
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
store i64 %val, i64 *%res
|
||||
ret i1 %obit
|
||||
}
|
||||
|
||||
; Check low end of the LLILF/SGR range.
|
||||
define zeroext i1 @f12(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: llilf [[REG1:%r[0-9]+]], 4294967295
|
||||
; CHECK: sgr %r3, [[REG1]]
|
||||
; CHECK-DAG: stg %r3, 0(%r4)
|
||||
; CHECK-DAG: ipm [[REG:%r[0-5]]]
|
||||
; CHECK-DAG: afi [[REG]], 1342177280
|
||||
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
|
||||
; CHECK: br %r14
|
||||
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967295)
|
||||
%val = extractvalue {i64, i1} %t, 0
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
store i64 %val, i64 *%res
|
||||
ret i1 %obit
|
||||
}
|
||||
|
||||
; Check the next value down, which must use register addition instead.
|
||||
define zeroext i1 @f13(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295
|
||||
; CHECK: agr [[REG1]], %r3
|
||||
; CHECK-DAG: stg [[REG1]], 0(%r4)
|
||||
; CHECK-DAG: ipm [[REG:%r[0-5]]]
|
||||
; CHECK-DAG: afi [[REG]], 1342177280
|
||||
; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
|
||||
; CHECK: br %r14
|
||||
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967296)
|
||||
%val = extractvalue {i64, i1} %t, 0
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
store i64 %val, i64 *%res
|
||||
ret i1 %obit
|
||||
}
|
||||
|
||||
; Check using the overflow result for a branch.
|
||||
define void @f14(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: aghi %r3, 1
|
||||
; CHECK: stg %r3, 0(%r4)
|
||||
; CHECK: {{jgo foo@PLT|bnor %r14}}
|
||||
@ -190,8 +241,8 @@ exit:
|
||||
}
|
||||
|
||||
; ... and the same with the inverted direction.
|
||||
define void @f12(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f12:
|
||||
define void @f15(i64 %dummy, i64 %a, i64 *%res) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: aghi %r3, 1
|
||||
; CHECK: stg %r3, 0(%r4)
|
||||
; CHECK: {{jgno foo@PLT|bor %r14}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user