1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00
llvm-mirror/test/CodeGen/X86/vector-sqrt.ll
Craig Topper abb03b6ae8 [X86] Merge the FR128 and VR128 regclass since they have identical spill and alignment characteristics.
This unfortunately requires a bunch of bitcasts to be added added to SUBREG_TO_REG, COPY_TO_REGCLASS, and instructions in output patterns. Otherwise tablegen seems to default to picking f128 and then we fail when something tries to get the register class for f128 which isn't always valid.

The test changes are because we were previously mixing fr128 and vr128 due to contrainRegClass finding FR128 first and passes like live range shrinking weren't handling that well.

llvm-svn: 337147
2018-07-16 06:56:09 +00:00

69 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; Function Attrs: nounwind readonly uwtable
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtd2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
entry:
%0 = load double, double* %v, align 8
%call = tail call double @sqrt(double %0) #2
%arrayidx1 = getelementptr inbounds double, double* %v, i64 1
%1 = load double, double* %arrayidx1, align 8
%call2 = tail call double @sqrt(double %1) #2
%vecinit.i = insertelement <2 x double> undef, double %call, i32 0
%vecinit1.i = insertelement <2 x double> %vecinit.i, double %call2, i32 1
ret <2 x double> %vecinit1.i
}
; Function Attrs: nounwind readnone
declare double @sqrt(double) local_unnamed_addr #1
; Function Attrs: nounwind readonly uwtable
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtf4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; CHECK-NEXT: retq
entry:
%0 = load float, float* %v, align 4
%call = tail call float @sqrtf(float %0) #2
%arrayidx1 = getelementptr inbounds float, float* %v, i64 1
%1 = load float, float* %arrayidx1, align 4
%call2 = tail call float @sqrtf(float %1) #2
%arrayidx3 = getelementptr inbounds float, float* %v, i64 2
%2 = load float, float* %arrayidx3, align 4
%call4 = tail call float @sqrtf(float %2) #2
%arrayidx5 = getelementptr inbounds float, float* %v, i64 3
%3 = load float, float* %arrayidx5, align 4
%call6 = tail call float @sqrtf(float %3) #2
%vecinit.i = insertelement <4 x float> undef, float %call, i32 0
%vecinit1.i = insertelement <4 x float> %vecinit.i, float %call2, i32 1
%vecinit2.i = insertelement <4 x float> %vecinit1.i, float %call4, i32 2
%vecinit3.i = insertelement <4 x float> %vecinit2.i, float %call6, i32 3
ret <4 x float> %vecinit3.i
}
; Function Attrs: nounwind readnone
declare float @sqrtf(float) local_unnamed_addr #1
attributes #0 = { nounwind readonly uwtable "target-features"="+avx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone "target-features"="+avx2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }