1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[PowerPC] Run reg2mem on tests to simplify them.

Summary:
Also convert test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll to use
FileCheck instead of two grep and count runs.

This change is needed to avoid spurious diffs in these tests when
EarlyCSE is improved to use MemorySSA and can do more load elimination.

Reviewers: hfinkel

Subscribers: mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D20238

llvm-svn: 271553
This commit is contained in:
Geoff Berry 2016-06-02 18:02:50 +00:00
parent 29c4e8764f
commit f48d5c7111
2 changed files with 73 additions and 181 deletions

View File

@ -4,7 +4,9 @@
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX
@vsc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
@vsc2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
@vuc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
@vuc2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
@res_vll = common global <2 x i64> zeroinitializer, align 16
@res_vull = common global <2 x i64> zeroinitializer, align 16
@res_vsc = common global <16 x i8> zeroinitializer, align 16
@ -13,54 +15,39 @@
; Function Attrs: nounwind
define void @test1() {
entry:
%__a.addr.i = alloca <16 x i8>, align 16
%__b.addr.i = alloca <16 x i8>, align 16
%0 = load <16 x i8>, <16 x i8>* @vsc, align 16
%1 = load <16 x i8>, <16 x i8>* @vsc, align 16
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
%2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
%3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
%4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
store <2 x i64> %4, <2 x i64>* @res_vll, align 16
%1 = load <16 x i8>, <16 x i8>* @vsc2, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %0, <16 x i8> %1)
store <2 x i64> %2, <2 x i64>* @res_vll, align 16
ret void
; CHECK-LABEL: @test1
; CHECK: lvx [[REG1:[0-9]+]],
; CHECK: lvx [[REG2:[0-9]+]],
; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
; CHECK: lvx [[REG1:[0-9]+]], 0, 3
; CHECK: lvx [[REG2:[0-9]+]], 0, 4
; CHECK: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
; Function Attrs: nounwind
define void @test2() {
entry:
%__a.addr.i = alloca <16 x i8>, align 16
%__b.addr.i = alloca <16 x i8>, align 16
%0 = load <16 x i8>, <16 x i8>* @vuc, align 16
%1 = load <16 x i8>, <16 x i8>* @vuc, align 16
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
%2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
%3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
%4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
store <2 x i64> %4, <2 x i64>* @res_vull, align 16
%1 = load <16 x i8>, <16 x i8>* @vuc2, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %0, <16 x i8> %1)
store <2 x i64> %2, <2 x i64>* @res_vull, align 16
ret void
; CHECK-LABEL: @test2
; CHECK: lvx [[REG1:[0-9]+]],
; CHECK: lvx [[REG2:[0-9]+]],
; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
; CHECK: lvx [[REG1:[0-9]+]], 0, 3
; CHECK: lvx [[REG2:[0-9]+]], 0, 4
; CHECK: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
; Function Attrs: nounwind
define void @test3() {
entry:
%__a.addr.i = alloca <16 x i8>, align 16
%0 = load <16 x i8>, <16 x i8>* @vsc, align 16
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
%1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
%2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @res_vsc, align 16
%1 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @res_vsc, align 16
ret void
; CHECK-LABEL: @test3
; CHECK: lvx [[REG1:[0-9]+]],
@ -71,12 +58,9 @@ entry:
; Function Attrs: nounwind
define void @test4() {
entry:
%__a.addr.i = alloca <16 x i8>, align 16
%0 = load <16 x i8>, <16 x i8>* @vuc, align 16
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
%1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
%2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @res_vuc, align 16
%1 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @res_vuc, align 16
ret void
; CHECK-LABEL: @test4
; CHECK: lvx [[REG1:[0-9]+]],

View File

@ -1,6 +1,4 @@
; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
; RUN: grep lxvd2x < %t | count 18
; RUN: grep stxvd2x < %t | count 18
; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
@ -18,150 +16,60 @@
define void @test1() {
entry:
; CHECK-LABEL: test1
%__a.addr.i31 = alloca i32, align 4
%__b.addr.i32 = alloca <4 x i32>*, align 8
%__a.addr.i29 = alloca i32, align 4
%__b.addr.i30 = alloca <4 x float>*, align 8
%__a.addr.i27 = alloca i32, align 4
%__b.addr.i28 = alloca <2 x i64>*, align 8
%__a.addr.i25 = alloca i32, align 4
%__b.addr.i26 = alloca <2 x i64>*, align 8
%__a.addr.i23 = alloca i32, align 4
%__b.addr.i24 = alloca <2 x double>*, align 8
%__a.addr.i20 = alloca <4 x i32>, align 16
%__b.addr.i21 = alloca i32, align 4
%__c.addr.i22 = alloca <4 x i32>*, align 8
%__a.addr.i17 = alloca <4 x i32>, align 16
%__b.addr.i18 = alloca i32, align 4
%__c.addr.i19 = alloca <4 x i32>*, align 8
%__a.addr.i14 = alloca <4 x float>, align 16
%__b.addr.i15 = alloca i32, align 4
%__c.addr.i16 = alloca <4 x float>*, align 8
%__a.addr.i11 = alloca <2 x i64>, align 16
%__b.addr.i12 = alloca i32, align 4
%__c.addr.i13 = alloca <2 x i64>*, align 8
%__a.addr.i8 = alloca <2 x i64>, align 16
%__b.addr.i9 = alloca i32, align 4
%__c.addr.i10 = alloca <2 x i64>*, align 8
%__a.addr.i6 = alloca <2 x double>, align 16
%__b.addr.i7 = alloca i32, align 4
%__c.addr.i = alloca <2 x double>*, align 8
%__a.addr.i = alloca i32, align 4
%__b.addr.i = alloca <4 x i32>*, align 8
store i32 0, i32* %__a.addr.i, align 4
store <4 x i32>* @vsi, <4 x i32>** %__b.addr.i, align 8
%0 = load i32, i32* %__a.addr.i, align 4
%1 = load <4 x i32>*, <4 x i32>** %__b.addr.i, align 8
%2 = bitcast <4 x i32>* %1 to i8*
%3 = getelementptr i8, i8* %2, i32 %0
%4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
store <4 x i32> %4, <4 x i32>* @res_vsi, align 16
store i32 0, i32* %__a.addr.i31, align 4
store <4 x i32>* @vui, <4 x i32>** %__b.addr.i32, align 8
%5 = load i32, i32* %__a.addr.i31, align 4
%6 = load <4 x i32>*, <4 x i32>** %__b.addr.i32, align 8
%7 = bitcast <4 x i32>* %6 to i8*
%8 = getelementptr i8, i8* %7, i32 %5
%9 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %8)
store <4 x i32> %9, <4 x i32>* @res_vui, align 16
store i32 0, i32* %__a.addr.i29, align 4
store <4 x float>* @vf, <4 x float>** %__b.addr.i30, align 8
%10 = load i32, i32* %__a.addr.i29, align 4
%11 = load <4 x float>*, <4 x float>** %__b.addr.i30, align 8
%12 = bitcast <4 x float>* %11 to i8*
%13 = getelementptr i8, i8* %12, i32 %10
%14 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %13)
%15 = bitcast <4 x i32> %14 to <4 x float>
store <4 x float> %15, <4 x float>* @res_vf, align 16
store i32 0, i32* %__a.addr.i27, align 4
store <2 x i64>* @vsll, <2 x i64>** %__b.addr.i28, align 8
%16 = load i32, i32* %__a.addr.i27, align 4
%17 = load <2 x i64>*, <2 x i64>** %__b.addr.i28, align 8
%18 = bitcast <2 x i64>* %17 to i8*
%19 = getelementptr i8, i8* %18, i32 %16
%20 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %19)
%21 = bitcast <2 x double> %20 to <2 x i64>
store <2 x i64> %21, <2 x i64>* @res_vsll, align 16
store i32 0, i32* %__a.addr.i25, align 4
store <2 x i64>* @vull, <2 x i64>** %__b.addr.i26, align 8
%22 = load i32, i32* %__a.addr.i25, align 4
%23 = load <2 x i64>*, <2 x i64>** %__b.addr.i26, align 8
%24 = bitcast <2 x i64>* %23 to i8*
%25 = getelementptr i8, i8* %24, i32 %22
%26 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %25)
%27 = bitcast <2 x double> %26 to <2 x i64>
store <2 x i64> %27, <2 x i64>* @res_vull, align 16
store i32 0, i32* %__a.addr.i23, align 4
store <2 x double>* @vd, <2 x double>** %__b.addr.i24, align 8
%28 = load i32, i32* %__a.addr.i23, align 4
%29 = load <2 x double>*, <2 x double>** %__b.addr.i24, align 8
%30 = bitcast <2 x double>* %29 to i8*
%31 = getelementptr i8, i8* %30, i32 %28
%32 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %31)
store <2 x double> %32, <2 x double>* @res_vd, align 16
%33 = load <4 x i32>, <4 x i32>* @vsi, align 16
store <4 x i32> %33, <4 x i32>* %__a.addr.i20, align 16
store i32 0, i32* %__b.addr.i21, align 4
store <4 x i32>* @res_vsi, <4 x i32>** %__c.addr.i22, align 8
%34 = load <4 x i32>, <4 x i32>* %__a.addr.i20, align 16
%35 = load i32, i32* %__b.addr.i21, align 4
%36 = load <4 x i32>*, <4 x i32>** %__c.addr.i22, align 8
%37 = bitcast <4 x i32>* %36 to i8*
%38 = getelementptr i8, i8* %37, i32 %35
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %34, i8* %38)
%39 = load <4 x i32>, <4 x i32>* @vui, align 16
store <4 x i32> %39, <4 x i32>* %__a.addr.i17, align 16
store i32 0, i32* %__b.addr.i18, align 4
store <4 x i32>* @res_vui, <4 x i32>** %__c.addr.i19, align 8
%40 = load <4 x i32>, <4 x i32>* %__a.addr.i17, align 16
%41 = load i32, i32* %__b.addr.i18, align 4
%42 = load <4 x i32>*, <4 x i32>** %__c.addr.i19, align 8
%43 = bitcast <4 x i32>* %42 to i8*
%44 = getelementptr i8, i8* %43, i32 %41
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %40, i8* %44)
%45 = load <4 x float>, <4 x float>* @vf, align 16
store <4 x float> %45, <4 x float>* %__a.addr.i14, align 16
store i32 0, i32* %__b.addr.i15, align 4
store <4 x float>* @res_vf, <4 x float>** %__c.addr.i16, align 8
%46 = load <4 x float>, <4 x float>* %__a.addr.i14, align 16
%47 = bitcast <4 x float> %46 to <4 x i32>
%48 = load i32, i32* %__b.addr.i15, align 4
%49 = load <4 x float>*, <4 x float>** %__c.addr.i16, align 8
%50 = bitcast <4 x float>* %49 to i8*
%51 = getelementptr i8, i8* %50, i32 %48
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %47, i8* %51) #1
%52 = load <2 x i64>, <2 x i64>* @vsll, align 16
store <2 x i64> %52, <2 x i64>* %__a.addr.i11, align 16
store i32 0, i32* %__b.addr.i12, align 4
store <2 x i64>* @res_vsll, <2 x i64>** %__c.addr.i13, align 8
%53 = load <2 x i64>, <2 x i64>* %__a.addr.i11, align 16
%54 = bitcast <2 x i64> %53 to <2 x double>
%55 = load i32, i32* %__b.addr.i12, align 4
%56 = load <2 x i64>*, <2 x i64>** %__c.addr.i13, align 8
%57 = bitcast <2 x i64>* %56 to i8*
%58 = getelementptr i8, i8* %57, i32 %55
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %54, i8* %58)
%59 = load <2 x i64>, <2 x i64>* @vull, align 16
store <2 x i64> %59, <2 x i64>* %__a.addr.i8, align 16
store i32 0, i32* %__b.addr.i9, align 4
store <2 x i64>* @res_vull, <2 x i64>** %__c.addr.i10, align 8
%60 = load <2 x i64>, <2 x i64>* %__a.addr.i8, align 16
%61 = bitcast <2 x i64> %60 to <2 x double>
%62 = load i32, i32* %__b.addr.i9, align 4
%63 = load <2 x i64>*, <2 x i64>** %__c.addr.i10, align 8
%64 = bitcast <2 x i64>* %63 to i8*
%65 = getelementptr i8, i8* %64, i32 %62
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %61, i8* %65)
%66 = load <2 x double>, <2 x double>* @vd, align 16
store <2 x double> %66, <2 x double>* %__a.addr.i6, align 16
store i32 0, i32* %__b.addr.i7, align 4
store <2 x double>* @res_vd, <2 x double>** %__c.addr.i, align 8
%67 = load <2 x double>, <2 x double>* %__a.addr.i6, align 16
%68 = load i32, i32* %__b.addr.i7, align 4
%69 = load <2 x double>*, <2 x double>** %__c.addr.i, align 8
%70 = bitcast <2 x double>* %69 to i8*
%71 = getelementptr i8, i8* %70, i32 %68
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %67, i8* %71)
; CHECK: lxvd2x
%0 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vsi to i8*))
; CHECK: stxvd2x
store <4 x i32> %0, <4 x i32>* @res_vsi, align 16
; CHECK: lxvd2x
%1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vui to i8*))
; CHECK: stxvd2x
store <4 x i32> %1, <4 x i32>* @res_vui, align 16
; CHECK: lxvd2x
%2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x float>* @vf to i8*))
%3 = bitcast <4 x i32> %2 to <4 x float>
; CHECK: stxvd2x
store <4 x float> %3, <4 x float>* @res_vf, align 16
; CHECK: lxvd2x
%4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vsll to i8*))
%5 = bitcast <2 x double> %4 to <2 x i64>
; CHECK: stxvd2x
store <2 x i64> %5, <2 x i64>* @res_vsll, align 16
; CHECK: lxvd2x
%6 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vull to i8*))
%7 = bitcast <2 x double> %6 to <2 x i64>
; CHECK: stxvd2x
store <2 x i64> %7, <2 x i64>* @res_vull, align 16
; CHECK: lxvd2x
%8 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x double>* @vd to i8*))
; CHECK: stxvd2x
store <2 x double> %8, <2 x double>* @res_vd, align 16
; CHECK: lxvd2x
%9 = load <4 x i32>, <4 x i32>* @vsi, align 16
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %9, i8* bitcast (<4 x i32>* @res_vsi to i8*))
; CHECK: lxvd2x
%10 = load <4 x i32>, <4 x i32>* @vui, align 16
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %10, i8* bitcast (<4 x i32>* @res_vui to i8*))
; CHECK: lxvd2x
%11 = load <4 x float>, <4 x float>* @vf, align 16
%12 = bitcast <4 x float> %11 to <4 x i32>
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %12, i8* bitcast (<4 x float>* @res_vf to i8*))
; CHECK: lxvd2x
%13 = load <2 x i64>, <2 x i64>* @vsll, align 16
%14 = bitcast <2 x i64> %13 to <2 x double>
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %14, i8* bitcast (<2 x i64>* @res_vsll to i8*))
; CHECK: lxvd2x
%15 = load <2 x i64>, <2 x i64>* @vull, align 16
%16 = bitcast <2 x i64> %15 to <2 x double>
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %16, i8* bitcast (<2 x i64>* @res_vull to i8*))
; CHECK: lxvd2x
%17 = load <2 x double>, <2 x double>* @vd, align 16
; CHECK: stxvd2x
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %17, i8* bitcast (<2 x double>* @res_vd to i8*))
ret void
}