mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[X86] Update TTI to report that v1iX/v1fX types aren't legal for masked gather/scatter/load/store.
The type legalizer will try to scalarize these operations if it sees them, but there is no handling for scalarizing them. This leads to a fatal error. With this change they will now be scalarized by the mem intrinsic scalarizing pass before SelectionDAG. llvm-svn: 318380
This commit is contained in:
parent
ec4cab03b9
commit
7346e8250d
@ -2478,6 +2478,9 @@ bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
|
||||
// The backend can't handle a single element vector.
|
||||
if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
|
||||
return false;
|
||||
Type *ScalarTy = DataTy->getScalarType();
|
||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
||||
@ -2501,8 +2504,13 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
|
||||
// the vector type.
|
||||
// The Scalarizer asks again about legality. It sends a vector type.
|
||||
// In this case we can reject non-power-of-2 vectors.
|
||||
if (isa<VectorType>(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements()))
|
||||
return false;
|
||||
// We also reject single element vectors as the type legalizer can't
|
||||
// scalarize it.
|
||||
if (isa<VectorType>(DataTy)) {
|
||||
unsigned NumElts = DataTy->getVectorNumElements();
|
||||
if (NumElts == 1 || !isPowerOf2_32(NumElts))
|
||||
return false;
|
||||
}
|
||||
Type *ScalarTy = DataTy->getScalarType();
|
||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
||||
|
@ -2400,3 +2400,79 @@ define <8 x i32> @test_global_array(<8 x i64> %indxs) {
|
||||
%g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
||||
ret <8 x i32> %g
|
||||
}
|
||||
|
||||
define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) {
|
||||
; KNL_64-LABEL: v1_scatter:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: testb $1, %dl
|
||||
; KNL_64-NEXT: jne .LBB42_1
|
||||
; KNL_64-NEXT: # BB#2: # %else
|
||||
; KNL_64-NEXT: retq
|
||||
; KNL_64-NEXT: .LBB42_1: # %cond.store
|
||||
; KNL_64-NEXT: movl %edi, (%rsi)
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: v1_scatter:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
||||
; KNL_32-NEXT: jne .LBB42_1
|
||||
; KNL_32-NEXT: # BB#2: # %else
|
||||
; KNL_32-NEXT: retl
|
||||
; KNL_32-NEXT: .LBB42_1: # %cond.store
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL_32-NEXT: movl %ecx, (%eax)
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: v1_scatter:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: testb $1, %dl
|
||||
; SKX-NEXT: jne .LBB42_1
|
||||
; SKX-NEXT: # BB#2: # %else
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: .LBB42_1: # %cond.store
|
||||
; SKX-NEXT: movl %edi, (%rsi)
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: v1_scatter:
|
||||
; SKX_32: # BB#0:
|
||||
; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
||||
; SKX_32-NEXT: jne .LBB42_1
|
||||
; SKX_32-NEXT: # BB#2: # %else
|
||||
; SKX_32-NEXT: retl
|
||||
; SKX_32-NEXT: .LBB42_1: # %cond.store
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SKX_32-NEXT: movl %ecx, (%eax)
|
||||
; SKX_32-NEXT: retl
|
||||
call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>)
|
||||
|
||||
define <1 x i32> @v1_gather(<1 x i32*> %ptr, <1 x i1> %mask, <1 x i32> %src0) {
|
||||
; KNL_64-LABEL: v1_gather:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: movl (%rdi), %eax
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: v1_gather:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movl (%eax), %eax
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: v1_gather:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: movl (%rdi), %eax
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: v1_gather:
|
||||
; SKX_32: # BB#0:
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: movl (%eax), %eax
|
||||
; SKX_32-NEXT: retl
|
||||
%res = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptr, i32 4, <1 x i1> <i1 true>, <1 x i32> %src0)
|
||||
ret <1 x i32>%res
|
||||
}
|
||||
declare <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*>, i32, <1 x i1>, <1 x i32>)
|
||||
|
@ -8,6 +8,86 @@
|
||||
; that does not have AVX, but that case should probably be a separate test file using less tests
|
||||
; because it takes over 1.2 seconds to codegen these tests on Haswell 4GHz if there's no maskmov.
|
||||
|
||||
define <1 x double> @loadv1(<1 x i64> %trigger, <1 x double>* %addr, <1 x double> %dst) {
|
||||
; AVX-LABEL: loadv1:
|
||||
; AVX: ## BB#0:
|
||||
; AVX-NEXT: testq %rdi, %rdi
|
||||
; AVX-NEXT: ## implicit-def: %XMM1
|
||||
; AVX-NEXT: je LBB0_1
|
||||
; AVX-NEXT: ## BB#2: ## %else
|
||||
; AVX-NEXT: testq %rdi, %rdi
|
||||
; AVX-NEXT: jne LBB0_3
|
||||
; AVX-NEXT: LBB0_4: ## %else
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX-NEXT: LBB0_1: ## %cond.load
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: testq %rdi, %rdi
|
||||
; AVX-NEXT: je LBB0_4
|
||||
; AVX-NEXT: LBB0_3: ## %else
|
||||
; AVX-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: loadv1:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: testq %rdi, %rdi
|
||||
; AVX512F-NEXT: ## implicit-def: %XMM1
|
||||
; AVX512F-NEXT: jne LBB0_2
|
||||
; AVX512F-NEXT: ## BB#1: ## %cond.load
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: LBB0_2: ## %else
|
||||
; AVX512F-NEXT: testq %rdi, %rdi
|
||||
; AVX512F-NEXT: sete %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: loadv1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: testq %rdi, %rdi
|
||||
; SKX-NEXT: ## implicit-def: %XMM1
|
||||
; SKX-NEXT: jne LBB0_2
|
||||
; SKX-NEXT: ## BB#1: ## %cond.load
|
||||
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; SKX-NEXT: LBB0_2: ## %else
|
||||
; SKX-NEXT: testq %rdi, %rdi
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <1 x i64> %trigger, zeroinitializer
|
||||
%res = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* %addr, i32 4, <1 x i1>%mask, <1 x double>%dst)
|
||||
ret <1 x double> %res
|
||||
}
|
||||
declare <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>*, i32, <1 x i1>, <1 x double>)
|
||||
|
||||
define void @storev1(<1 x i32> %trigger, <1 x i32>* %addr, <1 x i32> %val) {
|
||||
; AVX-LABEL: storev1:
|
||||
; AVX: ## BB#0:
|
||||
; AVX-NEXT: testl %edi, %edi
|
||||
; AVX-NEXT: je LBB1_1
|
||||
; AVX-NEXT: ## BB#2: ## %else
|
||||
; AVX-NEXT: retq
|
||||
; AVX-NEXT: LBB1_1: ## %cond.store
|
||||
; AVX-NEXT: movl %edx, (%rsi)
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: storev1:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: testl %edi, %edi
|
||||
; AVX512-NEXT: je LBB1_1
|
||||
; AVX512-NEXT: ## BB#2: ## %else
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: LBB1_1: ## %cond.store
|
||||
; AVX512-NEXT: movl %edx, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <1 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>%val, <1 x i32>* %addr, i32 4, <1 x i1>%mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>, <1 x i32>*, i32, <1 x i1>)
|
||||
|
||||
define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
|
||||
; AVX-LABEL: test6:
|
||||
; AVX: ## BB#0:
|
||||
|
Loading…
Reference in New Issue
Block a user