1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86] Clamp large constant shift amounts for MMX shift intrinsics to 8-bits.

The MMX intrinsics for shift by immediate take a 32-bit shift
amount but the hardware for shifting by immediate only encodes
8-bits. For the intrinsic we don't require the shift amount to
fit in 8-bits in the frontend because we don't check that its an
immediate in the frontend. If its is not an immediate we move it
to an MMX register and use the shift by register.

But if it is an immediate we'll use the shift by immediate
instruction. But we need to change the shift amount to 8-bits.
We were previously doing this accidentally by masking it in the
encoder. But this can make a large shift amount into a small
in bounds shift amount. Instead we should clamp larger shift
amounts to 255 so that the they don't become in bounds.

Fixes PR43922
This commit is contained in:
Craig Topper 2019-11-06 12:39:09 -08:00
parent 60c0901efb
commit 17612b1710
2 changed files with 39 additions and 2 deletions

View File

@ -23636,9 +23636,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue ShAmt = Op.getOperand(2);
// If the argument is a constant, convert it to a target constant.
if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) {
ShAmt = DAG.getTargetConstant(C->getZExtValue(), DL, MVT::i32);
// Clamp out of bounds shift amounts since they will otherwise be masked
// to 8-bits which may make it no longer out of bounds.
unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1), ShAmt);
Op.getOperand(0), Op.getOperand(1),
DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
}
unsigned NewIntrinsic;

View File

@ -647,6 +647,40 @@ entry:
ret void
}
; Make sure we clamp large shift amounts to 255
define i64 @pr43922() {
; X32-LABEL: pr43922:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: movq {{\.LCPI.*}}, %mm0 # mm0 = 0x7AAAAAAA7AAAAAAA
; X32-NEXT: psrad $255, %mm0
; X32-NEXT: movq %mm0, (%esp)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; X64-LABEL: pr43922:
; X64: # %bb.0: # %entry
; X64-NEXT: movq {{.*}}(%rip), %mm0 # mm0 = 0x7AAAAAAA7AAAAAAA
; X64-NEXT: psrad $255, %mm0
; X64-NEXT: movq %mm0, %rax
; X64-NEXT: retq
entry:
%0 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx bitcast (<2 x i32> <i32 2058005162, i32 2058005162> to x86_mmx), i32 268435456)
%1 = bitcast x86_mmx %0 to i64
ret i64 %1
}
declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)