mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
9a29810062
Extend shuffle canonicalization and conversion of shuffles fed by vectorized scalars to big endian subtargets. For big endian subtargets, loads and direct moves of scalars into vector registers put the data in the correct element for SCALAR_TO_VECTOR if the data type is 8 bytes wide. However, if the data type is narrower, the value still ends up in the wrong place - althouth a different wrong place than on little endian targets. This patch extends the combine that keeps values where they are if they feed a shuffle to big endian targets. Differential revision: https://reviews.llvm.org/D100478
111 lines
3.4 KiB
LLVM
111 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=LE
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=BE
|
|
|
|
define <8 x i16> @pr25080(<8 x i32> %a) {
|
|
; LE-LABEL: pr25080:
|
|
; LE: # %bb.0: # %entry
|
|
; LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
|
; LE-NEXT: xxlxor 37, 37, 37
|
|
; LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
|
; LE-NEXT: lvx 4, 0, 3
|
|
; LE-NEXT: xxland 34, 34, 36
|
|
; LE-NEXT: xxland 35, 35, 36
|
|
; LE-NEXT: vcmpequw 2, 2, 5
|
|
; LE-NEXT: vcmpequw 3, 3, 5
|
|
; LE-NEXT: xxswapd 0, 34
|
|
; LE-NEXT: mfvsrwz 3, 34
|
|
; LE-NEXT: xxsldwi 1, 34, 34, 1
|
|
; LE-NEXT: mfvsrwz 4, 35
|
|
; LE-NEXT: xxsldwi 2, 34, 34, 3
|
|
; LE-NEXT: mtvsrd 36, 3
|
|
; LE-NEXT: mffprwz 3, 0
|
|
; LE-NEXT: xxswapd 0, 35
|
|
; LE-NEXT: mtvsrd 37, 4
|
|
; LE-NEXT: mffprwz 4, 1
|
|
; LE-NEXT: xxsldwi 1, 35, 35, 1
|
|
; LE-NEXT: mtvsrd 34, 3
|
|
; LE-NEXT: mffprwz 3, 2
|
|
; LE-NEXT: mtvsrd 32, 4
|
|
; LE-NEXT: mffprwz 4, 0
|
|
; LE-NEXT: xxsldwi 0, 35, 35, 3
|
|
; LE-NEXT: mtvsrd 33, 3
|
|
; LE-NEXT: mffprwz 3, 1
|
|
; LE-NEXT: mtvsrd 38, 4
|
|
; LE-NEXT: mtvsrd 35, 3
|
|
; LE-NEXT: mffprwz 3, 0
|
|
; LE-NEXT: vmrghh 2, 0, 2
|
|
; LE-NEXT: mtvsrd 32, 3
|
|
; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
|
; LE-NEXT: vmrghh 4, 1, 4
|
|
; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
|
; LE-NEXT: vmrghh 3, 3, 6
|
|
; LE-NEXT: vmrghh 5, 0, 5
|
|
; LE-NEXT: vmrglw 2, 4, 2
|
|
; LE-NEXT: vspltish 4, 15
|
|
; LE-NEXT: vmrglw 3, 5, 3
|
|
; LE-NEXT: xxmrgld 34, 35, 34
|
|
; LE-NEXT: lvx 3, 0, 3
|
|
; LE-NEXT: xxlor 34, 34, 35
|
|
; LE-NEXT: vslh 2, 2, 4
|
|
; LE-NEXT: vsrah 2, 2, 4
|
|
; LE-NEXT: blr
|
|
;
|
|
; BE-LABEL: pr25080:
|
|
; BE: # %bb.0: # %entry
|
|
; BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
|
; BE-NEXT: xxlxor 36, 36, 36
|
|
; BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
|
; BE-NEXT: lxvw4x 0, 0, 3
|
|
; BE-NEXT: xxland 35, 35, 0
|
|
; BE-NEXT: xxland 34, 34, 0
|
|
; BE-NEXT: vcmpequw 3, 3, 4
|
|
; BE-NEXT: vcmpequw 2, 2, 4
|
|
; BE-NEXT: xxswapd 0, 35
|
|
; BE-NEXT: mfvsrwz 3, 35
|
|
; BE-NEXT: xxsldwi 1, 35, 35, 1
|
|
; BE-NEXT: mfvsrwz 4, 34
|
|
; BE-NEXT: mtvsrwz 36, 3
|
|
; BE-NEXT: xxsldwi 2, 35, 35, 3
|
|
; BE-NEXT: mffprwz 3, 0
|
|
; BE-NEXT: xxswapd 0, 34
|
|
; BE-NEXT: mtvsrwz 35, 4
|
|
; BE-NEXT: mffprwz 4, 1
|
|
; BE-NEXT: xxsldwi 1, 34, 34, 1
|
|
; BE-NEXT: mtvsrwz 37, 3
|
|
; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
|
; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
|
; BE-NEXT: mtvsrwz 32, 4
|
|
; BE-NEXT: mffprwz 4, 0
|
|
; BE-NEXT: lxvw4x 33, 0, 3
|
|
; BE-NEXT: xxsldwi 0, 34, 34, 3
|
|
; BE-NEXT: mffprwz 3, 1
|
|
; BE-NEXT: mffprwz 5, 2
|
|
; BE-NEXT: vperm 2, 0, 5, 1
|
|
; BE-NEXT: mtvsrwz 37, 3
|
|
; BE-NEXT: mffprwz 3, 0
|
|
; BE-NEXT: mtvsrwz 38, 5
|
|
; BE-NEXT: mtvsrwz 39, 4
|
|
; BE-NEXT: mtvsrwz 32, 3
|
|
; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha
|
|
; BE-NEXT: vperm 4, 6, 4, 1
|
|
; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l
|
|
; BE-NEXT: vperm 5, 5, 7, 1
|
|
; BE-NEXT: lxvw4x 0, 0, 3
|
|
; BE-NEXT: vperm 3, 0, 3, 1
|
|
; BE-NEXT: vmrghw 2, 4, 2
|
|
; BE-NEXT: vmrghw 3, 3, 5
|
|
; BE-NEXT: xxmrghd 34, 35, 34
|
|
; BE-NEXT: vspltish 3, 15
|
|
; BE-NEXT: xxlor 34, 34, 0
|
|
; BE-NEXT: vslh 2, 2, 3
|
|
; BE-NEXT: vsrah 2, 2, 3
|
|
; BE-NEXT: blr
|
|
entry:
|
|
%0 = trunc <8 x i32> %a to <8 x i23>
|
|
%1 = icmp eq <8 x i23> %0, zeroinitializer
|
|
%2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
|
|
%3 = sext <8 x i1> %2 to <8 x i16>
|
|
ret <8 x i16> %3
|
|
}
|