1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00
llvm-mirror/test/CodeGen/PowerPC/reduce_scalarization.ll
Lei Huang d48de5e046 [PowerPC] custom lower v2f64 fpext v2f32
Reduces scalarization overhead via custom lowering of v2f64 fpext v2f32.

eg. For the following IR
  %0 = load <2 x float>, <2 x float>* %Ptr, align 8
  %1 = fpext <2 x float> %0 to <2 x double>
  ret <2 x double> %1

Pre custom lowering:
  ld r3, 0(r3)
  mtvsrd f0, r3
  xxswapd vs34, vs0
  xscvspdpn f0, vs0
  xxsldwi vs1, vs34, vs34, 3
  xscvspdpn f1, vs1
  xxmrghd vs34, vs0, vs1

After custom lowering:
  lfd f0, 0(r3)
  xxmrghw vs0, vs0, vs0
  xvcvspdp vs34, vs0

Differential Revision: https://reviews.llvm.org/D57857

llvm-svn: 360429
2019-05-10 14:04:06 +00:00

78 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; Function Attrs: norecurse nounwind readonly
define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-NEXT: xvcvspdp v2, vs0
; CHECK-NEXT: blr
entry:
%0 = load <2 x float>, <2 x float>* %Ptr, align 8
%1 = fpext <2 x float> %0 to <2 x double>
ret <2 x double> %1
}
; Function Attrs: norecurse nounwind readonly
define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r4)
; CHECK-NEXT: lfd f1, 0(r3)
; CHECK-NEXT: xvsubsp vs0, vs1, vs0
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-NEXT: xvcvspdp v2, vs0
; CHECK-NEXT: blr
entry:
%0 = load <2 x float>, <2 x float>* %a, align 8
%1 = load <2 x float>, <2 x float>* %b, align 8
%sub = fsub <2 x float> %0, %1
%2 = fpext <2 x float> %sub to <2 x double>
ret <2 x double> %2
}
; Function Attrs: norecurse nounwind readonly
; Function Attrs: norecurse nounwind readonly
define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r4)
; CHECK-NEXT: lfd f1, 0(r3)
; CHECK-NEXT: xvaddsp vs0, vs1, vs0
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-NEXT: xvcvspdp v2, vs0
; CHECK-NEXT: blr
entry:
%0 = load <2 x float>, <2 x float>* %a, align 8
%1 = load <2 x float>, <2 x float>* %b, align 8
%sub = fadd <2 x float> %0, %1
%2 = fpext <2 x float> %sub to <2 x double>
ret <2 x double> %2
}
; Function Attrs: norecurse nounwind readonly
; Function Attrs: norecurse nounwind readonly
define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfd f0, 0(r4)
; CHECK-NEXT: lfd f1, 0(r3)
; CHECK-NEXT: xvmulsp vs0, vs1, vs0
; CHECK-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-NEXT: xvcvspdp v2, vs0
; CHECK-NEXT: blr
entry:
%0 = load <2 x float>, <2 x float>* %a, align 8
%1 = load <2 x float>, <2 x float>* %b, align 8
%sub = fmul <2 x float> %0, %1
%2 = fpext <2 x float> %sub to <2 x double>
ret <2 x double> %2
}