mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
a22f68829a
This patch corresponds to review: https://reviews.llvm.org/D19825 The new lxvx/stxvx instructions do not require the swaps to line the elements up correctly. In order to select them over the lxvd2x/lxvw4x instructions which require swaps, the patterns for the old instruction have a predicate that ensures they won't be selected on Power9 and newer CPUs. llvm-svn: 282143
37 lines
1.4 KiB
LLVM
37 lines
1.4 KiB
LLVM
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr8 \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
|
|
|
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s \
|
|
; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd
|
|
|
|
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 -mattr=-power9-vector \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
|
|
|
; Function Attrs: nounwind
|
|
define void @test() {
|
|
entry:
|
|
%__a.addr.i = alloca i32, align 4
|
|
%__b.addr.i = alloca <4 x i32>*, align 8
|
|
%i = alloca <4 x i32>, align 16
|
|
%j = alloca <4 x i32>, align 16
|
|
store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %i, align 16
|
|
store i32 0, i32* %__a.addr.i, align 4
|
|
store <4 x i32>* %i, <4 x i32>** %__b.addr.i, align 8
|
|
%0 = load i32, i32* %__a.addr.i, align 4
|
|
%1 = load <4 x i32>*, <4 x i32>** %__b.addr.i, align 8
|
|
%2 = bitcast <4 x i32>* %1 to i8*
|
|
%3 = getelementptr i8, i8* %2, i32 %0
|
|
%4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
|
|
; CHECK: lwa [[REG0:[0-9]+]],
|
|
; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]]
|
|
; CHECK: xxswapd [[REG1]], [[REG1]]
|
|
; CHECK-P9: lwa [[REG0:[0-9]+]],
|
|
; CHECK-P9: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]]
|
|
store <4 x i32> %4, <4 x i32>* %j, align 16
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind readonly
|
|
declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
|