mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-25 05:52:53 +02:00
f7fafe4ce2
This patch corresponds to review: http://reviews.llvm.org/D20443 It changes the legalization strategy for illegal vector types from integer promotion to widening. This only applies for vectors with elements of width that is a multiple of a byte since we have hardware support for vectors with 1, 2, 3, 8 and 16 byte elements. Integer promotion for vectors is quite expensive on PPC due to the sequence of breaking apart the vector, extending the elements and reconstituting the vector. Two of these operations are expensive. This patch causes between minor and major improvements in performance on most benchmarks. There are very few benchmarks whose performance regresses. These regressions can be handled in a subsequent patch with a DAG combine (similar to how this patch handles int -> fp conversions of illegal vector types). llvm-svn: 274535
46 lines
1.4 KiB
LLVM
46 lines
1.4 KiB
LLVM
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
|
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
|
target triple = "powerpc64-unknown-linux-gnu"
|
|
|
|
define i32 @stores(i32 %arg) {
|
|
|
|
; CHECK: cost of 1 {{.*}} store
|
|
store i8 undef, i8* undef, align 4
|
|
; CHECK: cost of 1 {{.*}} store
|
|
store i16 undef, i16* undef, align 4
|
|
; CHECK: cost of 1 {{.*}} store
|
|
store i32 undef, i32* undef, align 4
|
|
; CHECK: cost of 2 {{.*}} store
|
|
store i64 undef, i64* undef, align 4
|
|
; CHECK: cost of 4 {{.*}} store
|
|
store i128 undef, i128* undef, align 4
|
|
|
|
ret i32 undef
|
|
}
|
|
define i32 @loads(i32 %arg) {
|
|
; CHECK: cost of 1 {{.*}} load
|
|
load i8, i8* undef, align 4
|
|
; CHECK: cost of 1 {{.*}} load
|
|
load i16, i16* undef, align 4
|
|
; CHECK: cost of 1 {{.*}} load
|
|
load i32, i32* undef, align 4
|
|
; CHECK: cost of 2 {{.*}} load
|
|
load i64, i64* undef, align 4
|
|
; CHECK: cost of 4 {{.*}} load
|
|
load i128, i128* undef, align 4
|
|
|
|
; FIXME: There actually are sub-vector Altivec loads, and so we could handle
|
|
; this with a small expense, but we don't currently.
|
|
; CHECK: cost of 42 {{.*}} load
|
|
load <4 x i16>, <4 x i16>* undef, align 2
|
|
|
|
; CHECK: cost of 2 {{.*}} load
|
|
load <4 x i32>, <4 x i32>* undef, align 4
|
|
|
|
; CHECK: cost of 46 {{.*}} load
|
|
load <3 x float>, <3 x float>* undef, align 1
|
|
|
|
ret i32 undef
|
|
}
|
|
|