1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86][MMX] Optimize MMX shift intrinsics.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D83534
This commit is contained in:
Wang, Pengfei 2020-07-10 14:12:24 +08:00
parent 6ef62dfa4a
commit 87a74f65a7
2 changed files with 42 additions and 0 deletions

View File

@ -25236,6 +25236,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Clamp out of bounds shift amounts since they will otherwise be masked
// to 8-bits which may make it no longer out of bounds.
unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);
if (ShiftAmount == 0)
return Op.getOperand(1);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1),
DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));

View File

@ -311,6 +311,19 @@ entry:
ret i64 %4
}
define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
; ALL-LABEL: @test72_2
; ALL-NOT: psraw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
%1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind
%2 = bitcast x86_mmx %1 to <4 x i16>
%3 = bitcast <4 x i16> %2 to <1 x i64>
%4 = extractelement <1 x i64> %3, i32 0
ret i64 %4
}
declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
@ -339,6 +352,19 @@ entry:
ret i64 %4
}
define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
; ALL-LABEL: @test70_2
; ALL-NOT: psrld
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
%1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind
%2 = bitcast x86_mmx %1 to <2 x i32>
%3 = bitcast <2 x i32> %2 to <1 x i64>
%4 = extractelement <1 x i64> %3, i32 0
ret i64 %4
}
declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
@ -397,6 +423,19 @@ entry:
ret i64 %4
}
define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp {
; ALL-LABEL: @test66_2
; ALL-NOT: psllw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
%1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind
%2 = bitcast x86_mmx %1 to <4 x i16>
%3 = bitcast <4 x i16> %2 to <1 x i64>
%4 = extractelement <1 x i64> %3, i32 0
ret i64 %4
}
declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {