1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00
llvm-mirror/test/CodeGen/PowerPC/pr25080.ll
Nemanja Ivanovic 9a29810062 [PowerPC] Canonicalize shuffles on big endian targets as well
Extend shuffle canonicalization and conversion of shuffles fed by vectorized
scalars to big endian subtargets. For big endian subtargets, loads and direct
moves of scalars into vector registers put the data in the correct element for
SCALAR_TO_VECTOR if the data type is 8 bytes wide. However, if the data type is
narrower, the value still ends up in the wrong place - althouth a different
wrong place than on little endian targets.

This patch extends the combine that keeps values where they are if they feed a
shuffle to big endian targets.

Differential revision: https://reviews.llvm.org/D100478
2021-04-20 07:29:47 -05:00

111 lines
3.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=BE
define <8 x i16> @pr25080(<8 x i32> %a) {
; LE-LABEL: pr25080:
; LE: # %bb.0: # %entry
; LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; LE-NEXT: xxlxor 37, 37, 37
; LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; LE-NEXT: lvx 4, 0, 3
; LE-NEXT: xxland 34, 34, 36
; LE-NEXT: xxland 35, 35, 36
; LE-NEXT: vcmpequw 2, 2, 5
; LE-NEXT: vcmpequw 3, 3, 5
; LE-NEXT: xxswapd 0, 34
; LE-NEXT: mfvsrwz 3, 34
; LE-NEXT: xxsldwi 1, 34, 34, 1
; LE-NEXT: mfvsrwz 4, 35
; LE-NEXT: xxsldwi 2, 34, 34, 3
; LE-NEXT: mtvsrd 36, 3
; LE-NEXT: mffprwz 3, 0
; LE-NEXT: xxswapd 0, 35
; LE-NEXT: mtvsrd 37, 4
; LE-NEXT: mffprwz 4, 1
; LE-NEXT: xxsldwi 1, 35, 35, 1
; LE-NEXT: mtvsrd 34, 3
; LE-NEXT: mffprwz 3, 2
; LE-NEXT: mtvsrd 32, 4
; LE-NEXT: mffprwz 4, 0
; LE-NEXT: xxsldwi 0, 35, 35, 3
; LE-NEXT: mtvsrd 33, 3
; LE-NEXT: mffprwz 3, 1
; LE-NEXT: mtvsrd 38, 4
; LE-NEXT: mtvsrd 35, 3
; LE-NEXT: mffprwz 3, 0
; LE-NEXT: vmrghh 2, 0, 2
; LE-NEXT: mtvsrd 32, 3
; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; LE-NEXT: vmrghh 4, 1, 4
; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; LE-NEXT: vmrghh 3, 3, 6
; LE-NEXT: vmrghh 5, 0, 5
; LE-NEXT: vmrglw 2, 4, 2
; LE-NEXT: vspltish 4, 15
; LE-NEXT: vmrglw 3, 5, 3
; LE-NEXT: xxmrgld 34, 35, 34
; LE-NEXT: lvx 3, 0, 3
; LE-NEXT: xxlor 34, 34, 35
; LE-NEXT: vslh 2, 2, 4
; LE-NEXT: vsrah 2, 2, 4
; LE-NEXT: blr
;
; BE-LABEL: pr25080:
; BE: # %bb.0: # %entry
; BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; BE-NEXT: xxlxor 36, 36, 36
; BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; BE-NEXT: lxvw4x 0, 0, 3
; BE-NEXT: xxland 35, 35, 0
; BE-NEXT: xxland 34, 34, 0
; BE-NEXT: vcmpequw 3, 3, 4
; BE-NEXT: vcmpequw 2, 2, 4
; BE-NEXT: xxswapd 0, 35
; BE-NEXT: mfvsrwz 3, 35
; BE-NEXT: xxsldwi 1, 35, 35, 1
; BE-NEXT: mfvsrwz 4, 34
; BE-NEXT: mtvsrwz 36, 3
; BE-NEXT: xxsldwi 2, 35, 35, 3
; BE-NEXT: mffprwz 3, 0
; BE-NEXT: xxswapd 0, 34
; BE-NEXT: mtvsrwz 35, 4
; BE-NEXT: mffprwz 4, 1
; BE-NEXT: xxsldwi 1, 34, 34, 1
; BE-NEXT: mtvsrwz 37, 3
; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; BE-NEXT: mtvsrwz 32, 4
; BE-NEXT: mffprwz 4, 0
; BE-NEXT: lxvw4x 33, 0, 3
; BE-NEXT: xxsldwi 0, 34, 34, 3
; BE-NEXT: mffprwz 3, 1
; BE-NEXT: mffprwz 5, 2
; BE-NEXT: vperm 2, 0, 5, 1
; BE-NEXT: mtvsrwz 37, 3
; BE-NEXT: mffprwz 3, 0
; BE-NEXT: mtvsrwz 38, 5
; BE-NEXT: mtvsrwz 39, 4
; BE-NEXT: mtvsrwz 32, 3
; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha
; BE-NEXT: vperm 4, 6, 4, 1
; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l
; BE-NEXT: vperm 5, 5, 7, 1
; BE-NEXT: lxvw4x 0, 0, 3
; BE-NEXT: vperm 3, 0, 3, 1
; BE-NEXT: vmrghw 2, 4, 2
; BE-NEXT: vmrghw 3, 3, 5
; BE-NEXT: xxmrghd 34, 35, 34
; BE-NEXT: vspltish 3, 15
; BE-NEXT: xxlor 34, 34, 0
; BE-NEXT: vslh 2, 2, 3
; BE-NEXT: vsrah 2, 2, 3
; BE-NEXT: blr
entry:
%0 = trunc <8 x i32> %a to <8 x i23>
%1 = icmp eq <8 x i23> %0, zeroinitializer
%2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
%3 = sext <8 x i1> %2 to <8 x i16>
ret <8 x i16> %3
}