1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 13:33:37 +02:00
llvm-mirror/test/Transforms/InstCombine/x86-movmsk.ll
Simon Pilgrim 04933872c3 [InstCombine][SSE] Add MOVMSK support to SimplifyDemandedUseBits
The MOVMSK instructions copies a vector elements' sign bits to the low bits of a scalar register and zeros the high bits.

This patch adds MOVMSK support to SimplifyDemandedUseBits so that its aware that the upper bits are known to be zero. It also removes the call to MOVMSK if none of the lower bits are actually required and just returns zero.

Differential Revision: http://reviews.llvm.org/D19614

llvm-svn: 267873
2016-04-28 12:22:53 +00:00

121 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
; DemandedBits - MOVMSK zeros the upper bits of the result.
;
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, 15
ret i32 %2
}
define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, 3
ret i32 %2
}
define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, 65535
ret i32 %2
}
define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, 255
ret i32 %2
}
define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, 15
ret i32 %2
}
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
;
; DemandedBits - If we don't use the lower bits then we just return zero.
;
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, -16
ret i32 %2
}
define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, -4
ret i32 %2
}
define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, -65536
ret i32 %2
}
define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, -256
ret i32 %2
}
define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, -16
ret i32 %2
}
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)