mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-01 08:23:21 +01:00
f372d2334f
Codegen was failing with an assertion because of unexpected vector operands when legalizing the selection DAG for a MUL instruction. The asserting code was legalizing multiplies for vectors of size 128 bits. It uses a custom lowering to try and detect cases where it can use a VMULL instruction instead of a VMOVL + VMUL. The code was looking for input operands to the MUL that had been sign or zero extended. If it found the extended operands it would drop the sign/zero extension and use the original vector size as input to a VMULL instruction. The code assumed that the original input vector was 64 bits so that after dropping the extension it would fit directly into a D register and could be used as an operand of a VMULL instruction. The input code that trigger the failure used a vector of <4 x i8> that was sign extended to <4 x i32>. It was not safe to drop the sign extension in this case because the original vector is only 32 bits wide. The fix is to insert a sign extension for the vector to reach the required 64 bit size. In this particular example, the vector would need to be sign extented to a <4 x i16>. llvm-svn: 169024
151 lines
3.6 KiB
LLVM
151 lines
3.6 KiB
LLVM
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
|
|
|
|
; PR12281
|
|
; Test generataion of code for vmull instruction when multiplying 128-bit
|
|
; vectors that were created by sign-extending smaller vector sizes.
|
|
;
|
|
; The vmull operation requires 64-bit vectors, so we must extend the original
|
|
; vector size to 64 bits for vmull operation.
|
|
; Previously failed with an assertion because the <4 x i8> vector was too small
|
|
; for vmull.
|
|
|
|
; Vector x Constant
|
|
; v4i8
|
|
;
|
|
define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
|
|
;CHECK: sextload_v4i8_c:
|
|
entry:
|
|
%0 = load <4 x i8>* %v, align 8
|
|
%v0 = sext <4 x i8> %0 to <4 x i32>
|
|
;CHECK: vmull
|
|
%v1 = mul <4 x i32> %v0, <i32 3, i32 3, i32 3, i32 3>
|
|
store <4 x i32> %v1, <4 x i32>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i8
|
|
;
|
|
define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
|
|
;CHECK: sextload_v2i8_c:
|
|
entry:
|
|
%0 = load <2 x i8>* %v, align 8
|
|
%v0 = sext <2 x i8> %0 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, <i64 3, i64 3>
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i16
|
|
;
|
|
define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
|
|
;CHECK: sextload_v2i16_c:
|
|
entry:
|
|
%0 = load <2 x i16>* %v, align 8
|
|
%v0 = sext <2 x i16> %0 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, <i64 3, i64 3>
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
|
|
; Vector x Vector
|
|
; v4i8
|
|
;
|
|
define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
|
|
;CHECK: sextload_v4i8_v:
|
|
entry:
|
|
%0 = load <4 x i8>* %v, align 8
|
|
%v0 = sext <4 x i8> %0 to <4 x i32>
|
|
|
|
%1 = load <4 x i8>* %p, align 8
|
|
%v2 = sext <4 x i8> %1 to <4 x i32>
|
|
;CHECK: vmull
|
|
%v1 = mul <4 x i32> %v0, %v2
|
|
store <4 x i32> %v1, <4 x i32>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i8
|
|
;
|
|
define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
|
|
;CHECK: sextload_v2i8_v:
|
|
entry:
|
|
%0 = load <2 x i8>* %v, align 8
|
|
%v0 = sext <2 x i8> %0 to <2 x i64>
|
|
|
|
%1 = load <2 x i8>* %p, align 8
|
|
%v2 = sext <2 x i8> %1 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, %v2
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i16
|
|
;
|
|
define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
|
|
;CHECK: sextload_v2i16_v:
|
|
entry:
|
|
%0 = load <2 x i16>* %v, align 8
|
|
%v0 = sext <2 x i16> %0 to <2 x i64>
|
|
|
|
%1 = load <2 x i16>* %p, align 8
|
|
%v2 = sext <2 x i16> %1 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, %v2
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
|
|
; Vector(small) x Vector(big)
|
|
; v4i8 x v4i16
|
|
;
|
|
define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
|
|
;CHECK: sextload_v4i8_vs:
|
|
entry:
|
|
%0 = load <4 x i8>* %v, align 8
|
|
%v0 = sext <4 x i8> %0 to <4 x i32>
|
|
|
|
%1 = load <4 x i16>* %p, align 8
|
|
%v2 = sext <4 x i16> %1 to <4 x i32>
|
|
;CHECK: vmull
|
|
%v1 = mul <4 x i32> %v0, %v2
|
|
store <4 x i32> %v1, <4 x i32>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i8
|
|
; v2i8 x v2i16
|
|
define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
|
|
;CHECK: sextload_v2i8_vs:
|
|
entry:
|
|
%0 = load <2 x i8>* %v, align 8
|
|
%v0 = sext <2 x i8> %0 to <2 x i64>
|
|
|
|
%1 = load <2 x i16>* %p, align 8
|
|
%v2 = sext <2 x i16> %1 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, %v2
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|
|
|
|
; v2i16
|
|
; v2i16 x v2i32
|
|
define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
|
|
;CHECK: sextload_v2i16_vs:
|
|
entry:
|
|
%0 = load <2 x i16>* %v, align 8
|
|
%v0 = sext <2 x i16> %0 to <2 x i64>
|
|
|
|
%1 = load <2 x i32>* %p, align 8
|
|
%v2 = sext <2 x i32> %1 to <2 x i64>
|
|
;CHECK: vmull
|
|
%v1 = mul <2 x i64> %v0, %v2
|
|
store <2 x i64> %v1, <2 x i64>* undef, align 8
|
|
ret void;
|
|
}
|