1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[SystemZ] Add conditional store patterns

Add pseudo conditional store instructions, so that we use:

    branch foo:
    store
foo:

instead of:

    load
    branch foo:
    move
foo:
    store

z196 has real 32-bit and 64-bit conditional stores, but we don't use
any z196 instructions yet.

llvm-svn: 185065
This commit is contained in:
Richard Sandiford 2013-06-27 09:27:40 +00:00
parent 0006f07a9e
commit 964ffa104f
13 changed files with 1910 additions and 9 deletions

View File

@ -1696,6 +1696,59 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
return JoinMBB;
}
// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
// StoreOpcode is the store to use and Invert says whether the store should
// happen when the condition is false rather than true.
MachineBasicBlock *
SystemZTargetLowering::emitCondStore(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned StoreOpcode, bool Invert) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineOperand Base = MI->getOperand(0);
int64_t Disp = MI->getOperand(1).getImm();
unsigned IndexReg = MI->getOperand(2).getReg();
unsigned SrcReg = MI->getOperand(3).getReg();
unsigned CCMask = MI->getOperand(4).getImm();
DebugLoc DL = MI->getDebugLoc();
StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
// Get the condition needed to branch around the store.
if (!Invert)
CCMask = CCMask ^ SystemZ::CCMASK_ANY;
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
//
// The original DAG glues comparisons to their uses, both to ensure
// that no CC-clobbering instructions are inserted between them, and
// to ensure that comparison results are not reused. This means that
// this CondStore is the sole user of any preceding comparison instruction
// and that we can try to use a fused compare and branch instead.
MBB = StartMBB;
if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
// FalseMBB:
// store %SrcReg, %Disp(%Index,%Base)
// # fallthrough to JoinMBB
MBB = FalseMBB;
BuildMI(MBB, DL, TII->get(StoreOpcode))
.addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
MBB->addSuccessor(JoinMBB);
MI->eraseFromParent();
return JoinMBB;
}
// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
@ -2100,6 +2153,43 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
case SystemZ::CondStore8_32:
return emitCondStore(MI, MBB, SystemZ::STC32, false);
case SystemZ::CondStore8_32Inv:
return emitCondStore(MI, MBB, SystemZ::STC32, true);
case SystemZ::CondStore16_32:
return emitCondStore(MI, MBB, SystemZ::STH32, false);
case SystemZ::CondStore16_32Inv:
return emitCondStore(MI, MBB, SystemZ::STH32, true);
case SystemZ::CondStore32_32:
return emitCondStore(MI, MBB, SystemZ::ST32, false);
case SystemZ::CondStore32_32Inv:
return emitCondStore(MI, MBB, SystemZ::ST32, true);
case SystemZ::CondStore8:
return emitCondStore(MI, MBB, SystemZ::STC, false);
case SystemZ::CondStore8Inv:
return emitCondStore(MI, MBB, SystemZ::STC, true);
case SystemZ::CondStore16:
return emitCondStore(MI, MBB, SystemZ::STH, false);
case SystemZ::CondStore16Inv:
return emitCondStore(MI, MBB, SystemZ::STH, true);
case SystemZ::CondStore32:
return emitCondStore(MI, MBB, SystemZ::ST, false);
case SystemZ::CondStore32Inv:
return emitCondStore(MI, MBB, SystemZ::ST, true);
case SystemZ::CondStore64:
return emitCondStore(MI, MBB, SystemZ::STG, false);
case SystemZ::CondStore64Inv:
return emitCondStore(MI, MBB, SystemZ::STG, true);
case SystemZ::CondStoreF32:
return emitCondStore(MI, MBB, SystemZ::STE, false);
case SystemZ::CondStoreF32Inv:
return emitCondStore(MI, MBB, SystemZ::STE, true);
case SystemZ::CondStoreF64:
return emitCondStore(MI, MBB, SystemZ::STD, false);
case SystemZ::CondStoreF64Inv:
return emitCondStore(MI, MBB, SystemZ::STD, true);
case SystemZ::AEXT128_64:
return emitExt128(MI, MBB, false, SystemZ::subreg_low);
case SystemZ::ZEXT128_32:

View File

@ -203,6 +203,9 @@ private:
// Implement EmitInstrWithCustomInserter for individual operation types.
MachineBasicBlock *emitSelect(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *emitCondStore(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned StoreOpcode, bool Invert) const;
MachineBasicBlock *emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const;

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Control-flow instructions
// Select instructions
//===----------------------------------------------------------------------===//
// C's ?: operator for floating-point operands.
@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper<FP32>;
def SelectF64 : SelectWrapper<FP64>;
def SelectF128 : SelectWrapper<FP128>;
defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Move instructions
//===----------------------------------------------------------------------===//

View File

@ -956,6 +956,19 @@ class SelectWrapper<RegisterOperand cls>
let Uses = [CC];
}
// Stores $new to $addr if $cc is true ("" case) or false (Inv case).
multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
SDPatternOperator load, AddressingMode mode> {
let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
def "" : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
imm:$cc), mode:$addr)]>;
def Inv : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new,
imm:$cc), mode:$addr)]>;
}
}
// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
// describe the second (non-memory) operand.
class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,

View File

@ -163,9 +163,29 @@ defm AsmJE : IntCondExtendedMnemonic<8, "e", "nlh">;
defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">;
defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">;
//===----------------------------------------------------------------------===//
// Select instructions
//===----------------------------------------------------------------------===//
def Select32 : SelectWrapper<GR32>;
def Select64 : SelectWrapper<GR64>;
defm CondStore8_32 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16_32 : CondStores<GR32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
defm CondStore32_32 : CondStores<GR32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
defm CondStore8 : CondStores<GR64, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16 : CondStores<GR64, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
defm CondStore32 : CondStores<GR64, nonvolatile_truncstorei32,
nonvolatile_anyextloadi32, bdxaddr20only>;
defm CondStore64 : CondStores<GR64, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Call instructions
//===----------------------------------------------------------------------===//

View File

@ -120,6 +120,20 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
// Extending loads in which the extension type doesn't matter.
def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
}]>;
def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
// Aligned loads.
class AlignedLoad<SDPatternOperator load>
: PatFrag<(ops node:$addr), (load node:$addr), [{
@ -149,7 +163,10 @@ class NonvolatileLoad<SDPatternOperator load>
LoadSDNode *Load = cast<LoadSDNode>(N);
return !Load->isVolatile();
}]>;
def nonvolatile_load : NonvolatileLoad<load>;
def nonvolatile_load : NonvolatileLoad<load>;
def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
@ -157,7 +174,10 @@ class NonvolatileStore<SDPatternOperator store>
StoreSDNode *Store = cast<StoreSDNode>(N);
return !Store->isVolatile();
}]>;
def nonvolatile_store : NonvolatileStore<store>;
def nonvolatile_store : NonvolatileStore<store>;
def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
// Insertions.
def inserti8 : PatFrag<(ops node:$src1, node:$src2),

View File

@ -50,12 +50,8 @@ class RMWI<SDPatternOperator load, SDPatternOperator operator,
// memory location. IMM is the type of the second operand.
multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
Instruction insn> {
def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
}
// Record that INSN performs insertion TYPE into a register of class CLS.

View File

@ -0,0 +1,396 @@
; Test 8-bit conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(i8 *)
; Test the simple case, with the loaded value first.
define void @f1(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %alt, i8 %orig
store i8 %res, i8 *%ptr
ret void
}
; Test cases where the value is explicitly sign-extended to 32 bits, with the
; loaded value first.
define void @f3(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = sext i8 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; ...and with the loaded value second
define void @f4(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = sext i8 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; Test cases where the value is explicitly zero-extended to 32 bits, with the
; loaded value first.
define void @f5(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = zext i8 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; ...and with the loaded value second
define void @f6(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = zext i8 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; Test cases where the value is explicitly sign-extended to 64 bits, with the
; loaded value first.
define void @f7(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = sext i8 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; ...and with the loaded value second
define void @f8(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f8:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = sext i8 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; Test cases where the value is explicitly zero-extended to 64 bits, with the
; loaded value first.
define void @f9(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f9:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = zext i8 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; ...and with the loaded value second
define void @f10(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f10:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%ext = zext i8 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; Check the high end of the STC range.
define void @f11(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f11:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stc %r3, 4095(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 4095
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check the next byte up, which should use STCY instead of STC.
define void @f12(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f12:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stcy %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 4096
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check the high end of the STCY range.
define void @f13(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f13:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stcy %r3, 524287(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 524287
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check the next byte up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f14(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f14:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 524288
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check the low end of the STCY range.
define void @f15(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f15:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stcy %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 -524288
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check the next byte down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f16(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: f16:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524289
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i8 *%base, i64 -524289
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check that STCY allows an index.
define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) {
; CHECK: f17:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stcy %r4, 4096(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: f18:
; CHECK: lb {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: stc {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %r3.
define void @f19(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: f19:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lb %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store volatile i8 %res, i8 *%ptr
ret void
}
; Check that atomic loads are not matched. The transformation is OK for
; the "unordered" case tested here, but since we don't try to handle atomic
; operations at all in this context, it seems better to assert that than
; to restrict the test to a stronger ordering.
define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK: f20:
; CHECK: cs {{%r[0-9]+}},
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: stc {{%r[0-9]+}},
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load atomic i8 *%ptr unordered, align 1
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
}
; ...likewise stores.
define void @f21(i8 *%ptr, i8 %alt, i32 %limit) {
; FIXME: should use a normal store instead of CS.
; CHECK: f21:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lb %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: cs {{%r[0-9]+}},
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store atomic i8 %res, i8 *%ptr unordered, align 1
ret void
}
; Try a frame index base.
define void @f22(i8 %alt, i32 %limit) {
; CHECK: f22:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: stc {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca i8
call void @foo(i8 *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
call void @foo(i8 *%ptr)
ret void
}

View File

@ -0,0 +1,396 @@
; Test 16-bit conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(i16 *)
; Test the simple case, with the loaded value first.
define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %alt, i16 %orig
store i16 %res, i16 *%ptr
ret void
}
; Test cases where the value is explicitly sign-extended to 32 bits, with the
; loaded value first.
define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = sext i16 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; ...and with the loaded value second
define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = sext i16 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; Test cases where the value is explicitly zero-extended to 32 bits, with the
; loaded value first.
define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = zext i16 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; ...and with the loaded value second
define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = zext i16 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; Test cases where the value is explicitly sign-extended to 64 bits, with the
; loaded value first.
define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = sext i16 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; ...and with the loaded value second
define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f8:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = sext i16 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; Test cases where the value is explicitly zero-extended to 64 bits, with the
; loaded value first.
define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f9:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = zext i16 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; ...and with the loaded value second
define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f10:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%ext = zext i16 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i16
store i16 %trunc, i16 *%ptr
ret void
}
; Check the high end of the aligned STH range.
define void @f11(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f11:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sth %r3, 4094(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 2047
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check the next halfword up, which should use STHY instead of STH.
define void @f12(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f12:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sthy %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 2048
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check the high end of the aligned STHY range.
define void @f13(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f13:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sthy %r3, 524286(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 262143
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check the next halfword up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f14(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f14:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 262144
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check the low end of the STHY range.
define void @f15(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f15:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sthy %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 -262144
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check the next halfword down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f16(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: f16:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524290
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i16 *%base, i64 -262145
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check that STHY allows an index.
define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
; CHECK: f17:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sthy %r4, 4096(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: f18:
; CHECK: lh {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: sth {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %r3.
define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: f19:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lh %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store volatile i16 %res, i16 *%ptr
ret void
}
; Check that atomic loads are not matched. The transformation is OK for
; the "unordered" case tested here, but since we don't try to handle atomic
; operations at all in this context, it seems better to assert that than
; to restrict the test to a stronger ordering.
define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK: f20:
; CHECK: cs {{%r[0-9]+}},
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: sth {{%r[0-9]+}},
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load atomic i16 *%ptr unordered, align 2
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
}
; ...likewise stores.
define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
; FIXME: should use a normal store instead of CS.
; CHECK: f21:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lh %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: cs {{%r[0-9]+}},
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store atomic i16 %res, i16 *%ptr unordered, align 2
ret void
}
; Try a frame index base.
define void @f22(i16 %alt, i32 %limit) {
; CHECK: f22:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca i16
call void @foo(i16 *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
call void @foo(i16 *%ptr)
ret void
}

View File

@ -0,0 +1,322 @@
; Test 32-bit conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(i32 *)
; Test the simple case, with the loaded value first.
define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %alt, i32 %orig
store i32 %res, i32 *%ptr
ret void
}
; Test cases where the value is explicitly sign-extended to 64 bits, with the
; loaded value first.
define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
store i32 %trunc, i32 *%ptr
ret void
}
; ...and with the loaded value second
define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
store i32 %trunc, i32 *%ptr
ret void
}
; Test cases where the value is explicitly zero-extended to 32 bits, with the
; loaded value first.
define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
store i32 %trunc, i32 *%ptr
ret void
}
; ...and with the loaded value second
define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
store i32 %trunc, i32 *%ptr
ret void
}
; Check the high end of the aligned ST range.
define void @f7(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: st %r3, 4092(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 1023
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the next word up, which should use STY instead of ST.
define void @f8(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f8:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sty %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 1024
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the high end of the aligned STY range.
define void @f9(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f9:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sty %r3, 524284(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f10(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f10:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the low end of the STY range.
define void @f11(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f11:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sty %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f12(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: f12:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524292
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check that STY allows an index.
define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
; CHECK: f13:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: sty %r4, 4096(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f14:
; CHECK: l {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: st {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %r3.
define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: f15:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: l %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store volatile i32 %res, i32 *%ptr
ret void
}
; Check that atomic loads are not matched. The transformation is OK for
; the "unordered" case tested here, but since we don't try to handle atomic
; operations at all in this context, it seems better to assert that than
; to restrict the test to a stronger ordering.
define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK: f16:
; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: st {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load atomic i32 *%ptr unordered, align 4
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; ...likewise stores.
define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
; FIXME: should use a normal store instead of CS.
; CHECK: f17:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: l %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store atomic i32 %res, i32 *%ptr unordered, align 4
ret void
}
; Try a frame index base.
define void @f18(i32 %alt, i32 %limit) {
; CHECK: f18:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca i32
call void @foo(i32 *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
call void @foo(i32 *%ptr)
ret void
}

View File

@ -0,0 +1,214 @@
; Test 64-bit conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(i64 *)
; Test with the loaded value first.
define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %alt, i64 %orig
store i64 %res, i64 *%ptr
ret void
}
; Check the high end of the aligned STG range.
define void @f3(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stg %r3, 524280(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i64 *%base, i64 65535
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; Check the next doubleword up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f4(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i64 *%base, i64 65536
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; Check the low end of the STG range.
define void @f5(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stg %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i64 *%base, i64 -65536
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; Check the next doubleword down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f6(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524296
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr i64 *%base, i64 -65537
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; Check that STG allows an index.
define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stg %r4, 524287(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f8:
; CHECK: lg {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: stg {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %r3.
define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: f9:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store volatile i64 %res, i64 *%ptr
ret void
}
; Check that atomic loads are not matched. The transformation is OK for
; the "unordered" case tested here, but since we don't try to handle atomic
; operations at all in this context, it seems better to assert that than
; to restrict the test to a stronger ordering.
define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CSG.
; CHECK: f10:
; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: stg {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load atomic i64 *%ptr unordered, align 8
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
}
; ...likewise stores.
define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
; FIXME: should use a normal store instead of CSG.
; CHECK: f11:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: lg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store atomic i64 %res, i64 *%ptr unordered, align 8
ret void
}
; Try a frame index base.
define void @f12(i64 %alt, i32 %limit) {
; CHECK: f12:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca i64
call void @foo(i64 *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
call void @foo(i64 *%ptr)
ret void
}

View File

@ -0,0 +1,213 @@
; Test f32 conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(float *)
; Test with the loaded value first.
define void @f1(float *%ptr, float %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(float *%ptr, float %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %alt, float %orig
store float %res, float *%ptr
ret void
}
; Check the high end of the aligned STE range.
define void @f3(float *%base, float %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: ste %f0, 4092(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 1023
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check the next word up, which should use STEY instead of STE.
define void @f4(float *%base, float %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stey %f0, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 1024
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check the high end of the aligned STEY range.
define void @f5(float *%base, float %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stey %f0, 524284(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 131071
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f6(float *%base, float %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 131072
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check the low end of the STEY range.
define void @f7(float *%base, float %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stey %f0, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f8(float *%base, float %alt, i32 %limit) {
; CHECK: f8:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524292
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr float *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check that STEY allows an index.
define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) {
; CHECK: f9:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stey %f0, 4096(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to float *
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f10(float *%ptr, float %alt, i32 %limit) {
; CHECK: f10:
; CHECK: le {{%f[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: ste {{%f[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %f0.
define void @f11(float *%ptr, float %alt, i32 %limit) {
; CHECK: f11:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: le %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: ste %f0, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store volatile float %res, float *%ptr
ret void
}
; Try a frame index base.
define void @f12(float %alt, i32 %limit) {
; CHECK: f12:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: ste {{%f[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca float
call void @foo(float *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
call void @foo(float *%ptr)
ret void
}

View File

@ -0,0 +1,213 @@
; Test f64 conditional stores that are presented as selects.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo(double *)
; Test with the loaded value first.
define void @f1(double *%ptr, double %alt, i32 %limit) {
; CHECK: f1:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(double *%ptr, double %alt, i32 %limit) {
; CHECK: f2:
; CHECK-NOT: %r2
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %alt, double %orig
store double %res, double *%ptr
ret void
}
; Check the high end of the aligned STD range.
define void @f3(double *%base, double %alt, i32 %limit) {
; CHECK: f3:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: std %f0, 4088(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 511
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check the next doubleword up, which should use STDY instead of STD.
define void @f4(double *%base, double %alt, i32 %limit) {
; CHECK: f4:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stdy %f0, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 512
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check the high end of the aligned STDY range.
define void @f5(double *%base, double %alt, i32 %limit) {
; CHECK: f5:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stdy %f0, 524280(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 65535
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check the next doubleword up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f6(double *%base, double %alt, i32 %limit) {
; CHECK: f6:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, 524288
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 65536
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check the low end of the STDY range.
define void @f7(double *%base, double %alt, i32 %limit) {
; CHECK: f7:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stdy %f0, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 -65536
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check the next doubleword down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f8(double *%base, double %alt, i32 %limit) {
; CHECK: f8:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: agfi %r2, -524296
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%ptr = getelementptr double *%base, i64 -65537
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check that STDY allows an index.
define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) {
; CHECK: f9:
; CHECK-NOT: %r2
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r2
; CHECK: stdy %f0, 524287(%r3,%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to double *
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; Check that volatile loads are not matched.
define void @f10(double *%ptr, double %alt, i32 %limit) {
; CHECK: f10:
; CHECK: ld {{%f[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: std {{%f[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load volatile double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
}
; ...likewise stores. In this case we should have a conditional load into %f0.
define void @f11(double *%ptr, double %alt, i32 %limit) {
; CHECK: f11:
; CHECK: jnl [[LABEL:[^ ]*]]
; CHECK: ld %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: std %f0, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store volatile double %res, double *%ptr
ret void
}
; Try a frame index base.
define void @f12(double %alt, i32 %limit) {
; CHECK: f12:
; CHECK: brasl %r14, foo@PLT
; CHECK-NOT: %r15
; CHECK: jl [[LABEL:[^ ]*]]
; CHECK-NOT: %r15
; CHECK: std {{%f[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: [[LABEL]]:
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca double
call void @foo(double *%ptr)
%cond = icmp ult i32 %limit, 42
%orig = load double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
call void @foo(double *%ptr)
ret void
}