[BPF] Teach DAG2DAG AND elimination about load intrinsics

As commented on the existing code: // The Reg operand should be a virtual register, which is defined // outside the current basic block. DAG combiner has done a pretty // good job in removing truncating inside a single basic block. However, when the Reg operand comes from bpf_load_[byte | half | word] intrinsics, the generic optimizer doesn't understand their results are zero extended, so these single basic block elimination opportunities were missed. Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com> Acked-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 322534
2024-11-23 19:23:23 +01:00 · 2018-01-16 07:27:19 +00:00 · 2018-01-16 07:27:19 +00:00 · 4f63cbd37f
commit 4f63cbd37f
parent 98e064bb0e
2 changed files with 89 additions and 7 deletions
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@ -519,6 +519,37 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
  if (!MaskN)
    return;

+  // The Reg operand should be a virtual register, which is defined
+  // outside the current basic block. DAG combiner has done a pretty
+  // good job in removing truncating inside a single basic block except
+  // when the Reg operand comes from bpf_load_[byte | half | word] for
+  // which the generic optimizer doesn't understand their results are
+  // zero extended.
+  SDValue BaseV = Node->getOperand(0);
+  if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+    unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
+    uint64_t MaskV = MaskN->getZExtValue();
+
+    if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
+          (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
+          (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
+      return;
+
+    DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
+          dbgs() << '\n');
+
+    I--;
+    CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
+    I++;
+    CurDAG->DeleteNode(Node);
+
+    return;
+  }
+
+  // Multiple basic blocks case.
+  if (BaseV.getOpcode() != ISD::CopyFromReg)
+    return;
+
  unsigned match_load_op = 0;
  switch (MaskN->getZExtValue()) {
  default:
@ -534,13 +565,6 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
    break;
  }

-  // The Reg operand should be a virtual register, which is defined
-  // outside the current basic block. DAG combiner has done a pretty
-  // good job in removing truncating inside a single basic block.
-  SDValue BaseV = Node->getOperand(0);
-  if (BaseV.getOpcode() != ISD::CopyFromReg)
-    return;
-
  const RegisterSDNode *RegN =
      dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
  if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
--- a/test/CodeGen/BPF/remove_truncate_4.ll
+++ b/test/CodeGen/BPF/remove_truncate_4.ll
@ -0,0 +1,58 @@
+; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s
+
+; Source code:
+;struct __sk_buff;
+;unsigned long long
+;load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
+;unsigned long long
+;load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
+;typedef unsigned char      uint8_t;
+;typedef unsigned short     uint16_t;
+;
+;int func_b(struct __sk_buff *skb)
+;{
+;    uint8_t t = load_byte(skb, 0);
+;    return t;
+;}
+;
+;int func_h(struct __sk_buff *skb)
+;{
+;    uint16_t t = load_half(skb, 0);
+;    return t;
+;}
+;
+;int func_w(struct __sk_buff *skb)
+;{
+;    uint32_t t = load_word(skb, 0);
+;    return t;
+;}
+
+%struct.__sk_buff = type opaque
+
+; Function Attrs: nounwind readonly
+define i32 @func_b(%struct.__sk_buff* %skb) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast %struct.__sk_buff* %skb to i8*
+  %call = tail call i64 @llvm.bpf.load.byte(i8* %0, i64 0)
+  %conv = trunc i64 %call to i32
+  %conv1 = and i32 %conv, 255
+; CHECK-NOT:  r0 &= 255
+  ret i32 %conv1
+}
+
+; Function Attrs: nounwind readonly
+declare i64 @llvm.bpf.load.byte(i8*, i64) #1
+
+; Function Attrs: nounwind readonly
+define i32 @func_h(%struct.__sk_buff* %skb) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast %struct.__sk_buff* %skb to i8*
+  %call = tail call i64 @llvm.bpf.load.half(i8* %0, i64 0)
+  %conv = trunc i64 %call to i32
+  %conv1 = and i32 %conv, 65535
+; CHECK-NOT:  r0 &= 65535
+  ret i32 %conv1
+}
+
+; Function Attrs: nounwind readonly
+declare i64 @llvm.bpf.load.half(i8*, i64) #1