mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
d48de5e046
Reduces scalarization overhead via custom lowering of v2f64 fpext v2f32. eg. For the following IR %0 = load <2 x float>, <2 x float>* %Ptr, align 8 %1 = fpext <2 x float> %0 to <2 x double> ret <2 x double> %1 Pre custom lowering: ld r3, 0(r3) mtvsrd f0, r3 xxswapd vs34, vs0 xscvspdpn f0, vs0 xxsldwi vs1, vs34, vs34, 3 xscvspdpn f1, vs1 xxmrghd vs34, vs0, vs1 After custom lowering: lfd f0, 0(r3) xxmrghw vs0, vs0, vs0 xvcvspdp vs34, vs0 Differential Revision: https://reviews.llvm.org/D57857 llvm-svn: 360429
78 lines
2.7 KiB
LLVM
78 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
|
|
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
|
|
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
|
|
|
; Function Attrs: norecurse nounwind readonly
|
|
define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lfd f0, 0(r3)
|
|
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
|
|
; CHECK-NEXT: xvcvspdp v2, vs0
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%0 = load <2 x float>, <2 x float>* %Ptr, align 8
|
|
%1 = fpext <2 x float> %0 to <2 x double>
|
|
ret <2 x double> %1
|
|
}
|
|
|
|
; Function Attrs: norecurse nounwind readonly
|
|
define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lfd f0, 0(r4)
|
|
; CHECK-NEXT: lfd f1, 0(r3)
|
|
; CHECK-NEXT: xvsubsp vs0, vs1, vs0
|
|
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
|
|
; CHECK-NEXT: xvcvspdp v2, vs0
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%0 = load <2 x float>, <2 x float>* %a, align 8
|
|
%1 = load <2 x float>, <2 x float>* %b, align 8
|
|
%sub = fsub <2 x float> %0, %1
|
|
%2 = fpext <2 x float> %sub to <2 x double>
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
; Function Attrs: norecurse nounwind readonly
|
|
; Function Attrs: norecurse nounwind readonly
|
|
define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
|
|
; CHECK-LABEL: test3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lfd f0, 0(r4)
|
|
; CHECK-NEXT: lfd f1, 0(r3)
|
|
; CHECK-NEXT: xvaddsp vs0, vs1, vs0
|
|
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
|
|
; CHECK-NEXT: xvcvspdp v2, vs0
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%0 = load <2 x float>, <2 x float>* %a, align 8
|
|
%1 = load <2 x float>, <2 x float>* %b, align 8
|
|
%sub = fadd <2 x float> %0, %1
|
|
%2 = fpext <2 x float> %sub to <2 x double>
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
; Function Attrs: norecurse nounwind readonly
|
|
; Function Attrs: norecurse nounwind readonly
|
|
define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
|
|
; CHECK-LABEL: test4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lfd f0, 0(r4)
|
|
; CHECK-NEXT: lfd f1, 0(r3)
|
|
; CHECK-NEXT: xvmulsp vs0, vs1, vs0
|
|
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
|
|
; CHECK-NEXT: xvcvspdp v2, vs0
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%0 = load <2 x float>, <2 x float>* %a, align 8
|
|
%1 = load <2 x float>, <2 x float>* %b, align 8
|
|
%sub = fmul <2 x float> %0, %1
|
|
%2 = fpext <2 x float> %sub to <2 x double>
|
|
ret <2 x double> %2
|
|
}
|