mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
4b79bb7de1
Extend lowerShuffleWithPACK/matchShuffleWithPACK/createPackShuffleMask to handle compaction style shuffle masks that can be lowered to chains of PACKSS/PACKUS if their inputs are suitably sign/zero extended. This helps avoid PSHUFB (and its mask load) for short shuffle chains, shuffle combining will still replace with a PSHUFB if we have enough shuffles as getFauxShuffleMask should recognise the PACKSS/PACKUS chains.
28 lines
912 B
LLVM
28 lines
912 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
|
|
|
;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
|
|
|
|
define <4 x i8> @test1(<4 x double> %d) {
|
|
; CHECK-LABEL: test1:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retl
|
|
%c = fptoui <4 x double> %d to <4 x i8>
|
|
ret <4 x i8> %c
|
|
}
|
|
define <4 x i8> @test2(<4 x double> %d) {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retl
|
|
%c = fptosi <4 x double> %d to <4 x i8>
|
|
ret <4 x i8> %c
|
|
}
|