1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86][AVX2] Ensure on 32-bit targets that we broadcast f64 types not i64 (PR29101)

llvm-svn: 279622
This commit is contained in:
Simon Pilgrim 2016-08-24 12:42:31 +00:00
parent 5441dbe832
commit f39b229a31
2 changed files with 53 additions and 9 deletions

View File

@ -8733,6 +8733,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
V = DAG.getBitcast(SrcVT, V);
}
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
V = DAG.getBitcast(MVT::f64, V);
unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
}
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}

View File

@ -232,6 +232,43 @@ entry:
ret <4 x i64> %q3
}
define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
; X32-LABEL: broadcast_mem_v4i16_v8i16:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
; X64-LABEL: broadcast_mem_v4i16_v8i16:
; X64: ## BB#0:
; X64-NEXT: vpbroadcastq (%rdi), %xmm0
; X64-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %ptr
%shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %shuf
}
define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
; X32-LABEL: broadcast_mem_v4i16_v16i16:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X32-NEXT: vpbroadcastq %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_mem_v4i16_v16i16:
; X64: ## BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X64-NEXT: vpbroadcastq %xmm0, %ymm0
; X64-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %ptr
%shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i16> %shuf
}
; FIXME: Pointer adjusted broadcasts
define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
@ -563,7 +600,7 @@ entry:
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V111:
; X32: ## BB#0: ## %entry
; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1
; X32-NEXT: vpbroadcastd LCPI29_0, %ymm1
; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@ -580,7 +617,7 @@ entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V113:
; X32: ## BB#0: ## %entry
; X32-NEXT: vbroadcastss LCPI28_0, %ymm1
; X32-NEXT: vbroadcastss LCPI30_0, %ymm1
; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@ -597,7 +634,7 @@ entry:
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## BB#0:
; X32-NEXT: vbroadcastss LCPI29_0, %xmm0
; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: _e2:
@ -637,25 +674,25 @@ define void @crash() nounwind alwaysinline {
; X32: ## BB#0: ## %WGLoopsEntry
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: testb %al, %al
; X32-NEXT: je LBB31_1
; X32-NEXT: je LBB33_1
; X32-NEXT: ## BB#2: ## %ret
; X32-NEXT: retl
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: LBB31_1: ## %footer349VF
; X32-NEXT: LBB33_1: ## %footer349VF
; X32-NEXT: ## =>This Inner Loop Header: Depth=1
; X32-NEXT: jmp LBB31_1
; X32-NEXT: jmp LBB33_1
;
; X64-LABEL: crash:
; X64: ## BB#0: ## %WGLoopsEntry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al
; X64-NEXT: je LBB31_1
; X64-NEXT: je LBB33_1
; X64-NEXT: ## BB#2: ## %ret
; X64-NEXT: retq
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: LBB31_1: ## %footer349VF
; X64-NEXT: LBB33_1: ## %footer349VF
; X64-NEXT: ## =>This Inner Loop Header: Depth=1
; X64-NEXT: jmp LBB31_1
; X64-NEXT: jmp LBB33_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF