mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[SystemZ] Optimize floating-point comparisons with zero
This follows the same lines as the integer code. In the end it seemed easier to have a second 4-bit mask in TSFlags to specify the compare-like CC values. That eats one more TSFlags bit than adding a CCHasUnordered would have done, but it feels more concise. llvm-svn: 187883
This commit is contained in:
parent
5960348422
commit
b6323e0b21
@ -122,6 +122,12 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
|
||||
case SystemZ::LTR:
|
||||
case SystemZ::LTGR:
|
||||
case SystemZ::LTGFR:
|
||||
case SystemZ::LER:
|
||||
case SystemZ::LDR:
|
||||
case SystemZ::LXR:
|
||||
case SystemZ::LTEBR:
|
||||
case SystemZ::LTDBR:
|
||||
case SystemZ::LTXBR:
|
||||
if (MI->getOperand(1).getReg() == Reg &&
|
||||
MI->getOperand(1).getSubReg() == SubReg)
|
||||
return true;
|
||||
@ -230,15 +236,12 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
|
||||
unsigned MIFlags = Desc.TSFlags;
|
||||
|
||||
// See which compare-style condition codes are available.
|
||||
unsigned ReusableCCMask = 0;
|
||||
if (MIFlags & SystemZII::CCHasZero)
|
||||
ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
|
||||
unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
|
||||
|
||||
// For unsigned comparisons with zero, only equality makes sense.
|
||||
unsigned CompareFlags = Compare->getDesc().TSFlags;
|
||||
if (!(CompareFlags & SystemZII::IsLogical) &&
|
||||
(MIFlags & SystemZII::CCHasOrder))
|
||||
ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
|
||||
if (CompareFlags & SystemZII::IsLogical)
|
||||
ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
|
||||
|
||||
if (ReusableCCMask == 0)
|
||||
return false;
|
||||
@ -297,6 +300,21 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return true if Compare is a comparison against zero.
|
||||
static bool isCompareZero(MachineInstr *Compare) {
|
||||
switch (Compare->getOpcode()) {
|
||||
case SystemZ::LTEBRCompare:
|
||||
case SystemZ::LTDBRCompare:
|
||||
case SystemZ::LTXBRCompare:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return (Compare->getNumExplicitOperands() == 2 &&
|
||||
Compare->getOperand(1).isImm() &&
|
||||
Compare->getOperand(1).getImm() == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to optimize cases where comparison instruction Compare is testing
|
||||
// a value against zero. Return true on success and if Compare should be
|
||||
// deleted as dead. CCUsers is the list of instructions that use the CC
|
||||
@ -304,10 +322,7 @@ adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
|
||||
bool SystemZElimCompare::
|
||||
optimizeCompareZero(MachineInstr *Compare,
|
||||
SmallVectorImpl<MachineInstr *> &CCUsers) {
|
||||
// Check whether this is a comparison against zero.
|
||||
if (Compare->getNumExplicitOperands() != 2 ||
|
||||
!Compare->getOperand(1).isImm() ||
|
||||
Compare->getOperand(1).getImm() != 0)
|
||||
if (!isCompareZero(Compare))
|
||||
return false;
|
||||
|
||||
// Search back for CC results that are based on the first operand.
|
||||
|
@ -41,7 +41,7 @@ let neverHasSideEffects = 1 in {
|
||||
|
||||
// Moves between two floating-point registers that also set the condition
|
||||
// codes.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>;
|
||||
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
|
||||
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
|
||||
@ -149,15 +149,13 @@ def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
|
||||
def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
|
||||
|
||||
// Convert a signed integer register value to a floating-point one.
|
||||
let Defs = [CC] in {
|
||||
def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>;
|
||||
def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>;
|
||||
def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>;
|
||||
def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>;
|
||||
def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>;
|
||||
def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>;
|
||||
|
||||
def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>;
|
||||
def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>;
|
||||
def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>;
|
||||
}
|
||||
def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>;
|
||||
def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>;
|
||||
def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>;
|
||||
|
||||
// Convert a floating-point register value to a signed integer value,
|
||||
// with the second operand (modifier M3) specifying the rounding mode.
|
||||
@ -185,21 +183,21 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Negation (Load Complement).
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>;
|
||||
def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>;
|
||||
def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
|
||||
}
|
||||
|
||||
// Absolute value (Load Positive).
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>;
|
||||
def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>;
|
||||
def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
|
||||
}
|
||||
|
||||
// Negative absolute value (Load Negative).
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>;
|
||||
def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>;
|
||||
def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
|
||||
@ -219,11 +217,9 @@ def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
|
||||
// These forms always check for inexact conditions. z196 added versions
|
||||
// that allow this to suppressed (as for fnearbyint), but we don't yet
|
||||
// support -march=z196.
|
||||
let Defs = [CC] in {
|
||||
def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>;
|
||||
def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>;
|
||||
def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
|
||||
}
|
||||
def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>;
|
||||
def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>;
|
||||
def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
|
||||
|
||||
// frint rounds according to the current mode (modifier 0) and detects
|
||||
// inexact conditions.
|
||||
@ -236,7 +232,7 @@ def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Addition.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
let isCommutable = 1 in {
|
||||
def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>;
|
||||
def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>;
|
||||
@ -247,7 +243,7 @@ let Defs = [CC] in {
|
||||
}
|
||||
|
||||
// Subtraction.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
|
||||
def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>;
|
||||
def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>;
|
||||
def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>;
|
||||
@ -317,7 +313,7 @@ def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
|
||||
// Comparisons
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF in {
|
||||
def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>;
|
||||
def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>;
|
||||
def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>;
|
||||
|
@ -66,12 +66,9 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
|
||||
// SystemZ::CCMASK_*.
|
||||
bits<4> CCValues = 0;
|
||||
|
||||
// True if the instruction sets CC to 0 when the result is 0.
|
||||
bit CCHasZero = 0;
|
||||
|
||||
// True if the instruction sets CC to 1 when the result is less than 0
|
||||
// and to 2 when the result is greater than 0.
|
||||
bit CCHasOrder = 0;
|
||||
// The subset of CCValues that have the same meaning as they would after
|
||||
// a comparison of the first operand against zero.
|
||||
bits<4> CompareZeroCCMask = 0;
|
||||
|
||||
// True if the instruction is conditional and if the CC mask operand
|
||||
// comes first (as for BRC, etc.).
|
||||
@ -91,11 +88,10 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
|
||||
let TSFlags{4} = Is128Bit;
|
||||
let TSFlags{9-5} = AccessBytes;
|
||||
let TSFlags{13-10} = CCValues;
|
||||
let TSFlags{14} = CCHasZero;
|
||||
let TSFlags{15} = CCHasOrder;
|
||||
let TSFlags{16} = CCMaskFirst;
|
||||
let TSFlags{17} = CCMaskLast;
|
||||
let TSFlags{18} = IsLogical;
|
||||
let TSFlags{17-14} = CompareZeroCCMask;
|
||||
let TSFlags{18} = CCMaskFirst;
|
||||
let TSFlags{19} = CCMaskLast;
|
||||
let TSFlags{20} = IsLogical;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -770,6 +770,9 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
|
||||
case SystemZ::LR: return SystemZ::LTR;
|
||||
case SystemZ::LGFR: return SystemZ::LTGFR;
|
||||
case SystemZ::LGR: return SystemZ::LTGR;
|
||||
case SystemZ::LER: return SystemZ::LTEBR;
|
||||
case SystemZ::LDR: return SystemZ::LTDBR;
|
||||
case SystemZ::LXR: return SystemZ::LTXBR;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
@ -28,20 +28,20 @@ class SystemZTargetMachine;
|
||||
namespace SystemZII {
|
||||
enum {
|
||||
// See comments in SystemZInstrFormats.td.
|
||||
SimpleBDXLoad = (1 << 0),
|
||||
SimpleBDXStore = (1 << 1),
|
||||
Has20BitOffset = (1 << 2),
|
||||
HasIndex = (1 << 3),
|
||||
Is128Bit = (1 << 4),
|
||||
AccessSizeMask = (31 << 5),
|
||||
AccessSizeShift = 5,
|
||||
CCValuesMask = (15 << 10),
|
||||
CCValuesShift = 10,
|
||||
CCHasZero = (1 << 14),
|
||||
CCHasOrder = (1 << 15),
|
||||
CCMaskFirst = (1 << 16),
|
||||
CCMaskLast = (1 << 17),
|
||||
IsLogical = (1 << 18)
|
||||
SimpleBDXLoad = (1 << 0),
|
||||
SimpleBDXStore = (1 << 1),
|
||||
Has20BitOffset = (1 << 2),
|
||||
HasIndex = (1 << 3),
|
||||
Is128Bit = (1 << 4),
|
||||
AccessSizeMask = (31 << 5),
|
||||
AccessSizeShift = 5,
|
||||
CCValuesMask = (15 << 10),
|
||||
CCValuesShift = 10,
|
||||
CompareZeroCCMaskMask = (15 << 14),
|
||||
CompareZeroCCMaskShift = 14,
|
||||
CCMaskFirst = (1 << 18),
|
||||
CCMaskLast = (1 << 19),
|
||||
IsLogical = (1 << 20)
|
||||
};
|
||||
static inline unsigned getAccessSize(unsigned int Flags) {
|
||||
return (Flags & AccessSizeMask) >> AccessSizeShift;
|
||||
@ -49,6 +49,9 @@ namespace SystemZII {
|
||||
static inline unsigned getCCValues(unsigned int Flags) {
|
||||
return (Flags & CCValuesMask) >> CCValuesShift;
|
||||
}
|
||||
static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
|
||||
return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
|
||||
}
|
||||
|
||||
// SystemZ MachineOperand target flags.
|
||||
enum {
|
||||
|
@ -230,7 +230,7 @@ let neverHasSideEffects = 1 in {
|
||||
def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>;
|
||||
def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
|
||||
}
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
|
||||
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
|
||||
def LTR : UnaryRR <"lt", 0x12, null_frag, GR32, GR32>;
|
||||
def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>;
|
||||
}
|
||||
@ -276,7 +276,7 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
|
||||
[(set GR128:$dst, (load bdxaddr20only128:$src))]>;
|
||||
}
|
||||
}
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
|
||||
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
|
||||
def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>;
|
||||
def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>;
|
||||
}
|
||||
@ -374,7 +374,7 @@ let neverHasSideEffects = 1 in {
|
||||
def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>;
|
||||
def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
|
||||
}
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
|
||||
def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>;
|
||||
|
||||
// Match 32-to-64-bit sign extensions in which the source is already
|
||||
@ -393,7 +393,7 @@ def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>;
|
||||
def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>;
|
||||
def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
|
||||
def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
|
||||
def LTGF : UnaryRXY<"ltgf", 0xE332, sextloadi32, GR64, 4>;
|
||||
|
||||
// If the sign of a load-extend operation doesn't matter, use the signed ones.
|
||||
@ -532,11 +532,11 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Defs = [CC] in {
|
||||
let CCValues = 0xF, CCHasZero = 1 in {
|
||||
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
|
||||
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
|
||||
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
|
||||
}
|
||||
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
let CCValues = 0xE, CompareZeroCCMask = 0xE in
|
||||
def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
|
||||
}
|
||||
defm : SXU<ineg, LCGFR>;
|
||||
@ -589,7 +589,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Plain addition.
|
||||
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
|
||||
// Addition of a register.
|
||||
let isCommutable = 1 in {
|
||||
defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
|
||||
@ -660,7 +660,7 @@ let Defs = [CC], Uses = [CC] in {
|
||||
|
||||
// Plain substraction. Although immediate forms exist, we use the
|
||||
// add-immediate instruction instead.
|
||||
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
|
||||
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
|
||||
// Subtraction of a register.
|
||||
defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
|
||||
def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
|
||||
@ -710,7 +710,7 @@ let Defs = [CC], Uses = [CC] in {
|
||||
|
||||
let Defs = [CC] in {
|
||||
// ANDs of a register.
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
|
||||
defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
|
||||
}
|
||||
@ -730,14 +730,14 @@ let Defs = [CC] in {
|
||||
// ANDs of a 32-bit immediate, leaving other bits unaffected.
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
|
||||
def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
|
||||
def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
|
||||
def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
|
||||
}
|
||||
|
||||
// ANDs of memory.
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
|
||||
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
|
||||
}
|
||||
@ -754,7 +754,7 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
|
||||
|
||||
let Defs = [CC] in {
|
||||
// ORs of a register.
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
|
||||
defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
|
||||
}
|
||||
@ -773,13 +773,13 @@ let Defs = [CC] in {
|
||||
// ORs of a 32-bit immediate, leaving other bits unaffected.
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
|
||||
def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
|
||||
def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
|
||||
def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
|
||||
|
||||
// ORs of memory.
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
|
||||
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
|
||||
}
|
||||
@ -796,7 +796,7 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
|
||||
|
||||
let Defs = [CC] in {
|
||||
// XORs of a register.
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
|
||||
defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
|
||||
}
|
||||
@ -804,13 +804,13 @@ let Defs = [CC] in {
|
||||
// XORs of a 32-bit immediate, leaving other bits unaffected.
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
|
||||
def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
|
||||
def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
|
||||
def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
|
||||
|
||||
// XORs of memory.
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
|
||||
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
|
||||
}
|
||||
@ -886,7 +886,7 @@ let neverHasSideEffects = 1 in {
|
||||
}
|
||||
|
||||
// Arithmetic shift right.
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
|
||||
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
|
||||
defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
|
||||
def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
|
||||
}
|
||||
@ -903,7 +903,7 @@ let neverHasSideEffects = 1 in {
|
||||
let Defs = [CC] in {
|
||||
let isCodeGenOnly = 1 in
|
||||
def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
|
||||
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
let CCValues = 0xE, CompareZeroCCMask = 0xE in
|
||||
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
|
||||
}
|
||||
|
||||
|
348
test/CodeGen/SystemZ/fp-cmp-04.ll
Normal file
348
test/CodeGen/SystemZ/fp-cmp-04.ll
Normal file
@ -0,0 +1,348 @@
|
||||
; Test that floating-point compares are ommitted if CC already has the
|
||||
; right value.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
|
||||
|
||||
declare float @llvm.fabs.f32(float %f)
|
||||
|
||||
; Test addition followed by EQ, which can use the CC result of the addition.
|
||||
define float @f1(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: je .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fadd float %a, %b
|
||||
%cmp = fcmp oeq float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; ...and again with LT.
|
||||
define float @f2(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fadd float %a, %b
|
||||
%cmp = fcmp olt float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; ...and again with GT.
|
||||
define float @f3(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: jh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fadd float %a, %b
|
||||
%cmp = fcmp ogt float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; ...and again with UEQ.
|
||||
define float @f4(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: jnlh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fadd float %a, %b
|
||||
%cmp = fcmp ueq float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Subtraction also provides a zero-based CC value.
|
||||
define float @f5(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: seb %f0, 0(%r2)
|
||||
; CHECK-NEXT: jnhe .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cur = load float *%dest
|
||||
%res = fsub float %a, %cur
|
||||
%cmp = fcmp ult float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Test the result of LOAD POSITIVE.
|
||||
define float @f6(float %dummy, float %a, float *%dest) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: lpebr %f0, %f2
|
||||
; CHECK-NEXT: jh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = call float @llvm.fabs.f32(float %a)
|
||||
%cmp = fcmp ogt float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %res, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Test the result of LOAD NEGATIVE.
|
||||
define float @f7(float %dummy, float %a, float *%dest) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: lnebr %f0, %f2
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%abs = call float @llvm.fabs.f32(float %a)
|
||||
%res = fsub float -0.0, %abs
|
||||
%cmp = fcmp olt float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %res, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Test the result of LOAD COMPLEMENT.
|
||||
define float @f8(float %dummy, float %a, float *%dest) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: lcebr %f0, %f2
|
||||
; CHECK-NEXT: jle .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fsub float -0.0, %a
|
||||
%cmp = fcmp ole float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %res, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Multiplication (for example) does not modify CC.
|
||||
define float @f9(float %a, float %b, float *%dest) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: meebr %f0, %f2
|
||||
; CHECK-NEXT: ltebr %f0, %f0
|
||||
; CHECK-NEXT: jlh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = fmul float %a, %b
|
||||
%cmp = fcmp one float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Test a combination involving a CC-setting instruction followed by
|
||||
; a non-CC-setting instruction.
|
||||
define float @f10(float %a, float %b, float %c, float *%dest) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: debr %f0, %f4
|
||||
; CHECK-NEXT: ltebr %f0, %f0
|
||||
; CHECK-NEXT: jne .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = fadd float %a, %b
|
||||
%res = fdiv float %add, %c
|
||||
%cmp = fcmp une float %res, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %b, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; Test a case where CC is set based on a different register from the
|
||||
; compare input.
|
||||
define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: aebr %f0, %f2
|
||||
; CHECK-NEXT: sebr %f4, %f0
|
||||
; CHECK-NEXT: ste %f4, 0(%r2)
|
||||
; CHECK-NEXT: ltebr %f0, %f0
|
||||
; CHECK-NEXT: je .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = fadd float %a, %b
|
||||
%sub = fsub float %c, %add
|
||||
store float %sub, float *%dest1
|
||||
%cmp = fcmp oeq float %add, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %sub, float *%dest2
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %add
|
||||
}
|
||||
|
||||
; Test that LER gets converted to LTEBR where useful.
|
||||
define float @f12(float %dummy, float %val, float *%dest) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: ltebr %f0, %f2
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %f0
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
call void asm sideeffect "blah $0", "{f0}"(float %val)
|
||||
%cmp = fcmp olt float %val, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %val, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Test that LDR gets converted to LTDBR where useful.
|
||||
define double @f13(double %dummy, double %val, double *%dest) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: ltdbr %f0, %f2
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %f0
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
call void asm sideeffect "blah $0", "{f0}"(double %val)
|
||||
%cmp = fcmp olt double %val, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store double %val, double *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret double %val
|
||||
}
|
||||
|
||||
; Test that LXR gets converted to LTXBR where useful.
|
||||
define void @f14(fp128 *%ptr1, fp128 *%ptr2) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: ltxbr
|
||||
; CHECK-NEXT: dxbr
|
||||
; CHECK-NEXT: std
|
||||
; CHECK-NEXT: std
|
||||
; CHECK-NEXT: mxbr
|
||||
; CHECK-NEXT: std
|
||||
; CHECK-NEXT: std
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%val1 = load fp128 *%ptr1
|
||||
%val2 = load fp128 *%ptr2
|
||||
%div = fdiv fp128 %val1, %val2
|
||||
store fp128 %div, fp128 *%ptr1
|
||||
%mul = fmul fp128 %val1, %val2
|
||||
store fp128 %mul, fp128 *%ptr2
|
||||
%cmp = fcmp olt fp128 %val1, 0xL00000000000000000000000000000000
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
call void asm sideeffect "blah", ""()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a case where it is the source rather than destination of LER that
|
||||
; we need.
|
||||
define float @f15(float %val, float %dummy, float *%dest) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: ltebr %f2, %f0
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %f2
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
call void asm sideeffect "blah $0", "{f2}"(float %val)
|
||||
%cmp = fcmp olt float %val, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store float %val, float *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Test a case where it is the source rather than destination of LDR that
|
||||
; we need.
|
||||
define double @f16(double %val, double %dummy, double *%dest) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: ltdbr %f2, %f0
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %f2
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
call void asm sideeffect "blah $0", "{f2}"(double %val)
|
||||
%cmp = fcmp olt double %val, 0.0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store double %val, double *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret double %val
|
||||
}
|
Loading…
Reference in New Issue
Block a user