1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
Jay Foad bd8f1be276 [PowerPC] Custom lowering for funnel shifts
The custom lowering saves an instruction over the generic expansion, by
taking advantage of the fact that PowerPC shift instructions are well
defined in the shift-by-bitwidth case.

Differential Revision: https://reviews.llvm.org/D83948
2020-08-04 16:30:49 +01:00

64 lines
2.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s
%struct.m.2.5.8.11 = type { %struct.l.0.3.6.9, [7 x i8], %struct.a.1.4.7.10 }
%struct.l.0.3.6.9 = type { i8 }
%struct.a.1.4.7.10 = type { [27 x i8], [0 x i32], [4 x i8] }
define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind align 2 {
; CHECK-LABEL: _ZN1m1nEv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: ld r4, 16(r30)
; CHECK-NEXT: ld r5, 8(r30)
; CHECK-NEXT: subfic r29, r3, 64
; CHECK-NEXT: rldicl r3, r5, 60, 4
; CHECK-NEXT: sld r4, r4, r29
; CHECK-NEXT: lwz r5, 36(r30)
; CHECK-NEXT: or r3, r4, r3
; CHECK-NEXT: rlwinm r3, r3, 31, 0, 0
; CHECK-NEXT: clrlwi r4, r5, 31
; CHECK-NEXT: or r4, r4, r3
; CHECK-NEXT: bl _ZN1llsE1d
; CHECK-NEXT: nop
; CHECK-NEXT: ld r3, 16(r30)
; CHECK-NEXT: ld r4, 8(r30)
; CHECK-NEXT: rldicl r4, r4, 60, 4
; CHECK-NEXT: sld r3, r3, r29
; CHECK-NEXT: or r3, r3, r4
; CHECK-NEXT: sldi r3, r3, 31
; CHECK-NEXT: clrldi r4, r3, 32
; CHECK-NEXT: bl _ZN1llsE1d
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%bc = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2
%0 = bitcast %struct.a.1.4.7.10* %bc to i216*
%bf.load = load i216, i216* %0, align 8
%bf.lshr = lshr i216 %bf.load, 4
%shl.i23 = shl i216 %bf.lshr, 31
%shl.i = trunc i216 %shl.i23 to i32
%arrayidx = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2, i32 1, i64 0
%1 = load i32, i32* %arrayidx, align 4
%and.i = and i32 %1, 1
%or.i = or i32 %and.i, %shl.i
tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %or.i) #1
%bf.load10 = load i216, i216* %0, align 8
%bf.lshr11 = lshr i216 %bf.load10, 4
%shl.i1524 = shl i216 %bf.lshr11, 31
%shl.i15 = trunc i216 %shl.i1524 to i32
tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %shl.i15) #1
ret void
}
declare void @_ZN1llsE1d(%struct.l.0.3.6.9*, i32) local_unnamed_addr #0