Given a pair of floating point load and store, if there are no other uses of

the load, then it may be legal to transform the load and store to integer load and store of the same width. This is done if the target specified the transformation as profitable. e.g. On arm, this can transform: vldr.32 s0, [] vstr.32 s0, [] to ldr r12, [] str r12, [] rdar://8944252 llvm-svn: 124708
2025-02-01 13:11:39 +01:00 · 2011-02-02 01:06:55 +00:00 · 2011-02-02 01:06:55 +00:00 · c7ce7e2ac3
commit c7ce7e2ac3
parent b6fe71fef9
5 changed files with 109 additions and 2 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -963,6 +963,13 @@ public:
    return isTypeLegal(VT);
  }

+  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+  /// to transform a floating point op of specified opcode to a equivalent op of
+  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const {
+    return false;
+  }
+
  /// IsDesirableToPromoteOp - This method query the target whether it is
  /// beneficial for dag combiner to promote the specified node. If true, it
  /// should return the desired promotion type by reference.
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -42,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");
 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");

 namespace {
  static cl::opt<bool>
@ -234,6 +235,7 @@ namespace {
    SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
    SDValue ReduceLoadWidth(SDNode *N);
    SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);

    SDValue GetDemandedBits(SDValue V, const APInt &Mask);

@ -6111,6 +6113,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  return SDValue();
 }

+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
  StoreSDNode *ST  = cast<StoreSDNode>(N);
  SDValue Chain = ST->getChain();
@ -6210,6 +6269,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    }
  }

+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
  if (CombinerAA) {
    // Walk up chain skipping non-aliasing memory nodes.
    SDValue BetterChain = FindBetterChain(N, Chain);
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -5724,6 +5724,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
  return SDValue();
 }

+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+                                                          EVT VT) const {
+  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
  if (!Subtarget->allowsUnalignedMem())
    return false;
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@ -213,14 +213,16 @@ namespace llvm {
    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                    SelectionDAG &DAG) const;

-    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
    virtual const char *getTargetNodeName(unsigned Opcode) const;

    virtual MachineBasicBlock *
      EmitInstrWithCustomInserter(MachineInstr *MI,
                                  MachineBasicBlock *MBB) const;

+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
    /// unaligned memory accesses. of the specified type.
    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
--- a/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}