mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
3813edae72
This fixes a regression introduced by revision 268094. Revision 268094 added the following dag combine rule: // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 That rule converts a truncate of a shift-by-constant into a shift of a truncated value. We do this only if the shift count is less than half the size in bits of the truncated value (K < vt.size / 2). The problem is that the constraint on the shift count is incorrect, so the rule doesn't work well in some cases involving vector types. The combine rule should have been written instead like this: // trunc (shl x, K) -> shl (trunc x), K => K < vt.getScalarSizeInBits() Basically, if K is smaller than the "scalar size in bits" of the truncated value then we know that by "sinking" the truncate into the operand of the shift we would never accidentally make the shift undefined. This patch fixes the check on the shift count, and adds test cases to make sure that we don't regress the behavior. Differential Revision: https://reviews.llvm.org/D24154 llvm-svn: 280482
168 lines
4.9 KiB
LLVM
168 lines
4.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
|
|
|
|
define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
|
|
; SSE2-LABEL: trunc_shl_7_v4i32_v4i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,2,2,3]
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,2,2,3]
|
|
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
; SSE2-NEXT: pslld $7, %xmm1
|
|
; SSE2-NEXT: movdqa %xmm1, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_7_v4i32_v4i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = mem[0,2,2,3,4,6,6,7]
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
|
; AVX2-NEXT: vpslld $7, %xmm0, %xmm0
|
|
; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
|
|
; AVX2-NEXT: vzeroupper
|
|
; AVX2-NEXT: retq
|
|
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in
|
|
%shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7>
|
|
%trunc = trunc <4 x i64> %shl to <4 x i32>
|
|
store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define <8 x i16> @trunc_shl_v8i16_v8i32(<8 x i32> %a) {
|
|
; SSE2-LABEL: trunc_shl_v8i16_v8i32:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: pslld $17, %xmm0
|
|
; SSE2-NEXT: pslld $17, %xmm1
|
|
; SSE2-NEXT: pslld $16, %xmm1
|
|
; SSE2-NEXT: psrad $16, %xmm1
|
|
; SSE2-NEXT: pslld $16, %xmm0
|
|
; SSE2-NEXT: psrad $16, %xmm0
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_v8i16_v8i32:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: vpslld $17, %ymm0, %ymm0
|
|
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
|
; AVX2-NEXT: vzeroupper
|
|
; AVX2-NEXT: retq
|
|
%shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
|
|
%conv = trunc <8 x i32> %shl to <8 x i16>
|
|
ret <8 x i16> %conv
|
|
}
|
|
|
|
define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_31_i32_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movl (%rsi), %eax
|
|
; SSE2-NEXT: shll $31, %eax
|
|
; SSE2-NEXT: movl %eax, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_31_i32_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movl (%rsi), %eax
|
|
; AVX2-NEXT: shll $31, %eax
|
|
; AVX2-NEXT: movl %eax, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 31
|
|
%trunc = trunc i64 %shl to i32
|
|
store i32 %trunc, i32* %out
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_32_i32_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movl $0, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_32_i32_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movl $0, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 32
|
|
%trunc = trunc i64 %shl to i32
|
|
store i32 %trunc, i32* %out
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_15_i16_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movzwl (%rsi), %eax
|
|
; SSE2-NEXT: shlw $15, %ax
|
|
; SSE2-NEXT: movw %ax, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_15_i16_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movzwl (%rsi), %eax
|
|
; AVX2-NEXT: shlw $15, %ax
|
|
; AVX2-NEXT: movw %ax, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 15
|
|
%trunc = trunc i64 %shl to i16
|
|
store i16 %trunc, i16* %out
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_16_i16_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movw $0, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_16_i16_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movw $0, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 16
|
|
%trunc = trunc i64 %shl to i16
|
|
store i16 %trunc, i16* %out
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_7_i8_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movb (%rsi), %al
|
|
; SSE2-NEXT: shlb $7, %al
|
|
; SSE2-NEXT: movb %al, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_7_i8_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movb (%rsi), %al
|
|
; AVX2-NEXT: shlb $7, %al
|
|
; AVX2-NEXT: movb %al, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 7
|
|
%trunc = trunc i64 %shl to i8
|
|
store i8 %trunc, i8* %out
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
|
|
; SSE2-LABEL: trunc_shl_8_i8_i64:
|
|
; SSE2: # BB#0:
|
|
; SSE2-NEXT: movb $0, (%rdi)
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: trunc_shl_8_i8_i64:
|
|
; AVX2: # BB#0:
|
|
; AVX2-NEXT: movb $0, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
%val = load i64, i64* %in
|
|
%shl = shl i64 %val, 8
|
|
%trunc = trunc i64 %shl to i8
|
|
store i8 %trunc, i8* %out
|
|
ret void
|
|
}
|