1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[BPF] Teach DAG2DAG AND elimination about load intrinsics

As commented on the existing code:

  // The Reg operand should be a virtual register, which is defined
  // outside the current basic block. DAG combiner has done a pretty
  // good job in removing truncating inside a single basic block.

However, when the Reg operand comes from bpf_load_[byte | half | word]
intrinsics, the generic optimizer doesn't understand their results are
zero extended, so these single basic block elimination opportunities were
missed.

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
llvm-svn: 322534
This commit is contained in:
Yonghong Song 2018-01-16 07:27:19 +00:00
parent 98e064bb0e
commit 4f63cbd37f
2 changed files with 89 additions and 7 deletions

View File

@ -519,6 +519,37 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
if (!MaskN)
return;
// The Reg operand should be a virtual register, which is defined
// outside the current basic block. DAG combiner has done a pretty
// good job in removing truncating inside a single basic block except
// when the Reg operand comes from bpf_load_[byte | half | word] for
// which the generic optimizer doesn't understand their results are
// zero extended.
SDValue BaseV = Node->getOperand(0);
if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
uint64_t MaskV = MaskN->getZExtValue();
if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
(IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) ||
(IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF)))
return;
DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
dbgs() << '\n');
I--;
CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
I++;
CurDAG->DeleteNode(Node);
return;
}
// Multiple basic blocks case.
if (BaseV.getOpcode() != ISD::CopyFromReg)
return;
unsigned match_load_op = 0;
switch (MaskN->getZExtValue()) {
default:
@ -534,13 +565,6 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
break;
}
// The Reg operand should be a virtual register, which is defined
// outside the current basic block. DAG combiner has done a pretty
// good job in removing truncating inside a single basic block.
SDValue BaseV = Node->getOperand(0);
if (BaseV.getOpcode() != ISD::CopyFromReg)
return;
const RegisterSDNode *RegN =
dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))

View File

@ -0,0 +1,58 @@
; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s
; Source code:
;struct __sk_buff;
;unsigned long long
;load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
;unsigned long long
;load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
;typedef unsigned char uint8_t;
;typedef unsigned short uint16_t;
;
;int func_b(struct __sk_buff *skb)
;{
; uint8_t t = load_byte(skb, 0);
; return t;
;}
;
;int func_h(struct __sk_buff *skb)
;{
; uint16_t t = load_half(skb, 0);
; return t;
;}
;
;int func_w(struct __sk_buff *skb)
;{
; uint32_t t = load_word(skb, 0);
; return t;
;}
%struct.__sk_buff = type opaque
; Function Attrs: nounwind readonly
define i32 @func_b(%struct.__sk_buff* %skb) local_unnamed_addr #0 {
entry:
%0 = bitcast %struct.__sk_buff* %skb to i8*
%call = tail call i64 @llvm.bpf.load.byte(i8* %0, i64 0)
%conv = trunc i64 %call to i32
%conv1 = and i32 %conv, 255
; CHECK-NOT: r0 &= 255
ret i32 %conv1
}
; Function Attrs: nounwind readonly
declare i64 @llvm.bpf.load.byte(i8*, i64) #1
; Function Attrs: nounwind readonly
define i32 @func_h(%struct.__sk_buff* %skb) local_unnamed_addr #0 {
entry:
%0 = bitcast %struct.__sk_buff* %skb to i8*
%call = tail call i64 @llvm.bpf.load.half(i8* %0, i64 0)
%conv = trunc i64 %call to i32
%conv1 = and i32 %conv, 65535
; CHECK-NOT: r0 &= 65535
ret i32 %conv1
}
; Function Attrs: nounwind readonly
declare i64 @llvm.bpf.load.half(i8*, i64) #1