mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
1efbe554a0
One of current peephole optimiations is to remove SLL/SRL if the sub register has been zero extended. This phase has two bugs and one limitations. First, for the physical subregister used in pseudo insn COPY like below, it permits incorrect optimization. %0:gpr32 = COPY $w0 ... %4:gpr = MOV_32_64 %0:gpr32 %5:gpr = SLL_ri %4:gpr(tied-def 0), 32 %6:gpr = SRA_ri %5:gpr(tied-def 0), 32 The $w0 could be from the return value of a previous function call and its upper 32-bit value might contain some non-zero values. The same applies to function arguments. Second, the current code may permits removing SLL/SRA like below: %0:gpr32 = COPY $w0 %1:gpr32 = COPY %0:gpr32 ... %4:gpr = MOV_32_64 %1:gpr32 %5:gpr = SLL_ri %4:gpr(tied-def 0), 32 %6:gpr = SRA_ri %5:gpr(tied-def 0), 32 The reason is that it did not follow def-use chain to skip all intermediate 32bit-to-32bit COPY instructions. The current implementation is also very conservative for PHI instructions. If any PHI insn component is another PHI or COPY insn, it will just permit SLL/SRA. This patch fixed the issue as follows: - During def/use chain traversal, if any physical register is read, SLL/SRA will be preserved as these physical registers are mostly from function return values or current function arguments. - Recursively visit all COPY and PHI instructions.
123 lines
3.2 KiB
LLVM
123 lines
3.2 KiB
LLVM
; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
|
|
;
|
|
; long long select_u(unsigned a, unsigned b, long long c, long long d)
|
|
; {
|
|
; if (a > b)
|
|
; return c;
|
|
; else
|
|
; return d;
|
|
; }
|
|
;
|
|
; long long select_u_2(unsigned a, unsigned long long b, long long c, long long d)
|
|
; {
|
|
; if (a > b)
|
|
; return c;
|
|
; else
|
|
; return d;
|
|
; }
|
|
;
|
|
; long long select_s(signed a, signed b, long long c, long long d)
|
|
; {
|
|
; if (a > b)
|
|
; return c;
|
|
; else
|
|
; return d;
|
|
; }
|
|
;
|
|
; long long bar ();
|
|
;
|
|
; int foo (int b, int c)
|
|
; {
|
|
; unsigned int i32_val = (unsigned int) bar();
|
|
;
|
|
; if (i32_val < 10)
|
|
; return b;
|
|
; else
|
|
; return c;
|
|
; }
|
|
;
|
|
; int *inc_p (int *p, unsigned a)
|
|
; {
|
|
; return p + a;
|
|
; }
|
|
|
|
; Function Attrs: norecurse nounwind readnone
|
|
define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: select_u:
|
|
entry:
|
|
%cmp = icmp ugt i32 %a, %b
|
|
%c.d = select i1 %cmp, i64 %c, i64 %d
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} >>= 32
|
|
; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto
|
|
ret i64 %c.d
|
|
}
|
|
|
|
; Function Attrs: norecurse nounwind readnone
|
|
define dso_local i64 @select_u_2(i32 %a, i64 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: select_u_2:
|
|
entry:
|
|
%conv = zext i32 %a to i64
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} >>= 32
|
|
%cmp = icmp ugt i64 %conv, %b
|
|
%c.d = select i1 %cmp, i64 %c, i64 %d
|
|
ret i64 %c.d
|
|
}
|
|
|
|
; Function Attrs: norecurse nounwind readnone
|
|
define dso_local i64 @select_s(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: select_s:
|
|
entry:
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%c.d = select i1 %cmp, i64 %c, i64 %d
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} s>>= 32
|
|
; CHECK: if r{{[0-9]+}} s{{<|>}} r{{[0-9]+}} goto
|
|
ret i64 %c.d
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
define dso_local i32 @foo(i32 %b, i32 %c) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: foo:
|
|
entry:
|
|
%call = tail call i64 bitcast (i64 (...)* @bar to i64 ()*)() #2
|
|
%conv = trunc i64 %call to i32
|
|
%cmp = icmp ult i32 %conv, 10
|
|
; The shifts can't be optimized out because %call comes from function call
|
|
; returning i64 so the high bits might be valid.
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} >>= 32
|
|
%b.c = select i1 %cmp, i32 %b, i32 %c
|
|
; CHECK: if r{{[0-9]+}} {{<|>}} {{[0-9]+}} goto
|
|
ret i32 %b.c
|
|
}
|
|
|
|
declare dso_local i64 @bar(...) local_unnamed_addr #1
|
|
|
|
; Function Attrs: norecurse nounwind readnone
|
|
define dso_local i32* @inc_p(i32* readnone %p, i32 %a) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: inc_p:
|
|
entry:
|
|
%idx.ext = zext i32 %a to i64
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} >>= 32
|
|
%add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext
|
|
ret i32* %add.ptr
|
|
}
|
|
|
|
define dso_local i32 @test() local_unnamed_addr {
|
|
; CHECK-LABEL: test:
|
|
entry:
|
|
%call = tail call i32 bitcast (i32 (...)* @helper to i32 ()*)()
|
|
%cmp = icmp sgt i32 %call, 6
|
|
; The shifts can't be optimized out because %call comes from function call
|
|
; return i32 so the high bits might be invalid.
|
|
; CHECK: r{{[0-9]+}} <<= 32
|
|
; CHECK-NEXT: r{{[0-9]+}} s>>= 32
|
|
%cond = zext i1 %cmp to i32
|
|
; CHECK: if r{{[0-9]+}} s{{<|>}} {{[0-9]+}} goto
|
|
ret i32 %cond
|
|
}
|
|
declare dso_local i32 @helper(...) local_unnamed_addr
|