mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
b70fffa971
ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). <rdar://problem/16975435> llvm-svn: 209324
41 lines
1.4 KiB
LLVM
41 lines
1.4 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
|
|
|
|
|
|
; Verify that we produce movss instead of blendvps when possible.
|
|
|
|
;CHECK-LABEL: vsel_float:
|
|
;CHECK-NOT: blend
|
|
;CHECK: movss
|
|
;CHECK: ret
|
|
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
|
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
|
|
ret <4 x float> %vsel
|
|
}
|
|
|
|
;CHECK-LABEL: vsel_4xi8:
|
|
;CHECK-NOT: blend
|
|
;CHECK: movss
|
|
;CHECK: ret
|
|
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
|
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
|
|
ret <4 x i8> %vsel
|
|
}
|
|
|
|
;CHECK-LABEL: vsel_8xi16:
|
|
; The select mask is
|
|
; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
|
|
; which translates into the boolean mask (big endian representation):
|
|
; 00010001 = 17.
|
|
; '1' means takes the first argument, '0' means takes the second argument.
|
|
; This is the opposite of the intel syntax, thus we expect
|
|
; the inverted mask: 11101110 = 238.
|
|
; According to the ABI:
|
|
; v1 is in xmm0 => first argument is xmm0.
|
|
; v2 is in xmm1 => second argument is xmm1.
|
|
;CHECK: pblendw $238, %xmm1, %xmm0
|
|
;CHECK: ret
|
|
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
|
|
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
|
|
ret <8 x i16> %vsel
|
|
}
|