mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 05:23:45 +02:00
04933872c3
The MOVMSK instructions copies a vector elements' sign bits to the low bits of a scalar register and zeros the high bits. This patch adds MOVMSK support to SimplifyDemandedUseBits so that its aware that the upper bits are known to be zero. It also removes the call to MOVMSK if none of the lower bits are actually required and just returns zero. Differential Revision: http://reviews.llvm.org/D19614 llvm-svn: 267873
121 lines
3.5 KiB
LLVM
121 lines
3.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
;
|
|
; DemandedBits - MOVMSK zeros the upper bits of the result.
|
|
;
|
|
|
|
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
|
|
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
|
|
%2 = and i32 %1, 15
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
|
|
; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
|
|
; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
|
|
%2 = and i32 %1, 65535
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
|
|
; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
|
|
%2 = and i32 %1, 255
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
|
|
; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
|
|
; CHECK-NEXT: ret i32 [[TMP1]]
|
|
;
|
|
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
|
|
%2 = and i32 %1, 15
|
|
ret i32 %2
|
|
}
|
|
|
|
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
|
|
|
|
;
|
|
; DemandedBits - If we don't use the lower bits then we just return zero.
|
|
;
|
|
|
|
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
|
|
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
|
|
%2 = and i32 %1, -16
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
|
|
; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
|
|
%2 = and i32 %1, -4
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
|
|
; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
|
|
%2 = and i32 %1, -65536
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
|
|
; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
|
|
%2 = and i32 %1, -256
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
|
|
; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
|
|
%2 = and i32 %1, -16
|
|
ret i32 %2
|
|
}
|
|
|
|
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
|
|
|
|
|
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
|
|
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
|
|
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
|
|
|
|
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
|
|
declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
|
|
declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)
|