From 47cfb1bca2f7049c57d0b5a9992db3d7a5dd93ae Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Apr 2019 15:09:19 +0000 Subject: [PATCH] [SystemZ] Improve codegen for certain SADDO-immediate cases When performing an add-with-overflow with an immediate in the range -2G ... -4G, code currently loads the immediate into a register, which generally takes two instructions. In this particular case, it is preferable to load the negated immediate into a register instead, which always only requires one instruction, and then perform a subtract. llvm-svn: 357597 --- lib/Target/SystemZ/SystemZInstrInfo.td | 8 +++ lib/Target/SystemZ/SystemZOperands.td | 20 ++++++++ test/CodeGen/SystemZ/int-sadd-07.ll | 67 +++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 10081262389..edef54eef01 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1029,6 +1029,14 @@ let AddedComplexity = 1 in { (AGFI GR64:$src1, imm64sx32n:$src2)>; } +// And vice versa in one special case, where we need to load a +// constant into a register in any case, but the negated constant +// requires fewer instructions to load. +def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2), + (SGR GR64:$src1, (LLILH imm64lh16n:$src2))>; +def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2), + (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>; + // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 4d0c5d02ffa..56632e1529a 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -188,6 +188,17 @@ def HF32 : SDNodeXFormgetTargetConstant(Value, SDLoc(N), MVT::i64); }]>; +// Negated variants. +def NEGLH16 : SDNodeXFormgetZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + +def NEGLF32 : SDNodeXFormgetZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); +}]>; + // Truncate an immediate to a 8-bit signed quantity. def SIMM8 : SDNodeXFormgetTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), @@ -430,6 +441,15 @@ def imm64hf32c : ImmediategetZExtValue())); }], HF32, "U32Imm">; +// Negated immediates that fit LF32 or LH16. +def imm64lh16n : ImmediategetZExtValue())); +}], NEGLH16, "U16Imm">; + +def imm64lf32n : ImmediategetZExtValue())); +}], NEGLF32, "U32Imm">; + // Short immediates. def imm64sx8 : Immediate(N->getSExtValue()); diff --git a/test/CodeGen/SystemZ/int-sadd-07.ll b/test/CodeGen/SystemZ/int-sadd-07.ll index d800eb00c91..0adbe466f3a 100644 --- a/test/CodeGen/SystemZ/int-sadd-07.ll +++ b/test/CodeGen/SystemZ/int-sadd-07.ll @@ -151,12 +151,12 @@ define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) { ret i1 %obit } -; Check the next value down, which must use register addition instead. +; Check the next value down, which can use register subtraction instead. define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { ; CHECK-LABEL: f10: -; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295 -; CHECK: agr [[REG1]], %r3 -; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: llilf [[REG1:%r[0-9]+]], 2147483649 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) ; CHECK-DAG: ipm [[REG:%r[0-5]]] ; CHECK-DAG: afi [[REG]], 1342177280 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 @@ -168,9 +168,60 @@ define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { ret i1 %obit } -; Check using the overflow result for a branch. -define void @f11(i64 %dummy, i64 %a, i64 *%res) { +; We may be able to use LLILH instead of LLILF. +define zeroext i1 @f11(i64 %dummy, i64 %a, i64 *%res) { ; CHECK-LABEL: f11: +; CHECK: llilh [[REG1:%r[0-9]+]], 32769 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147549184) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check low end of the LLILF/SGR range. +define zeroext i1 @f12(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: llilf [[REG1:%r[0-9]+]], 4294967295 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967295) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use register addition instead. +define zeroext i1 @f13(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f13: +; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295 +; CHECK: agr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -4294967296) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f14(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f14: ; CHECK: aghi %r3, 1 ; CHECK: stg %r3, 0(%r4) ; CHECK: {{jgo foo@PLT|bnor %r14}} @@ -190,8 +241,8 @@ exit: } ; ... and the same with the inverted direction. -define void @f12(i64 %dummy, i64 %a, i64 *%res) { -; CHECK-LABEL: f12: +define void @f15(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f15: ; CHECK: aghi %r3, 1 ; CHECK: stg %r3, 0(%r4) ; CHECK: {{jgno foo@PLT|bor %r14}}