1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86] Lower to SHLD/SHRD on slow machines for optsize

Use consistent rules for when to lower to SHLD/SHRD for slow machines - fixes a weird issue where funnel shift gets expanded but then X86ISelLowering's combineOr sees the optsize and combines to SHLD/SHRD, but now with the modulo amount guard......

llvm-svn: 349285
This commit is contained in:
Simon Pilgrim 2018-12-15 19:43:44 +00:00
parent 4ab16cb47f
commit 79a641d507
3 changed files with 31 additions and 81 deletions

View File

@ -17047,9 +17047,9 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
SDValue Op1 = Op.getOperand(1); SDValue Op1 = Op.getOperand(1);
SDValue Amt = Op.getOperand(2); SDValue Amt = Op.getOperand(2);
// Expand slow SHLD/SHRD cases. // Expand slow SHLD/SHRD cases if we are not optimizing for size.
// TODO - can we be more selective here: OptSize/RMW etc.? bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
if (Subtarget.isSHLDSlow()) if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue(); return SDValue();
bool IsFSHR = Op.getOpcode() == ISD::FSHR; bool IsFSHR = Op.getOpcode() == ISD::FSHR;

View File

@ -179,46 +179,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
} }
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
; X86-FAST-LABEL: var_shift_i32_optsize: ; X86-LABEL: var_shift_i32_optsize:
; X86-FAST: # %bb.0: ; X86: # %bb.0:
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: shldl %cl, %edx, %eax ; X86-NEXT: shldl %cl, %edx, %eax
; X86-FAST-NEXT: retl ; X86-NEXT: retl
; ;
; X86-SLOW-LABEL: var_shift_i32_optsize: ; X64-LABEL: var_shift_i32_optsize:
; X86-SLOW: # %bb.0: ; X64: # %bb.0:
; X86-SLOW-NEXT: pushl %esi ; X64-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X64-NEXT: movl %edi, %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: shldl %cl, %esi, %eax
; X86-SLOW-NEXT: movl %eax, %edx ; X64-NEXT: retq
; X86-SLOW-NEXT: shldl %cl, %esi, %edx
; X86-SLOW-NEXT: andb $31, %cl
; X86-SLOW-NEXT: je .LBB3_2
; X86-SLOW-NEXT: # %bb.1:
; X86-SLOW-NEXT: movl %edx, %eax
; X86-SLOW-NEXT: .LBB3_2:
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i32_optsize:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movl %edx, %ecx
; X64-FAST-NEXT: movl %edi, %eax
; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-FAST-NEXT: shldl %cl, %esi, %eax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: var_shift_i32_optsize:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: movl %edx, %ecx
; X64-SLOW-NEXT: movl %edi, %eax
; X64-SLOW-NEXT: shldl %cl, %esi, %eax
; X64-SLOW-NEXT: andb $31, %cl
; X64-SLOW-NEXT: cmovel %edi, %eax
; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp ret i32 %tmp
} }

View File

@ -178,46 +178,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
} }
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
; X86-FAST-LABEL: var_shift_i32_optsize: ; X86-LABEL: var_shift_i32_optsize:
; X86-FAST: # %bb.0: ; X86: # %bb.0:
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: shrdl %cl, %edx, %eax ; X86-NEXT: shrdl %cl, %edx, %eax
; X86-FAST-NEXT: retl ; X86-NEXT: retl
; ;
; X86-SLOW-LABEL: var_shift_i32_optsize: ; X64-LABEL: var_shift_i32_optsize:
; X86-SLOW: # %bb.0: ; X64: # %bb.0:
; X86-SLOW-NEXT: pushl %esi ; X64-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X64-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: shrdl %cl, %edi, %eax
; X86-SLOW-NEXT: movl %eax, %edx ; X64-NEXT: retq
; X86-SLOW-NEXT: shrdl %cl, %esi, %edx
; X86-SLOW-NEXT: andb $31, %cl
; X86-SLOW-NEXT: je .LBB3_2
; X86-SLOW-NEXT: # %bb.1:
; X86-SLOW-NEXT: movl %edx, %eax
; X86-SLOW-NEXT: .LBB3_2:
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i32_optsize:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movl %edx, %ecx
; X64-FAST-NEXT: movl %esi, %eax
; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-FAST-NEXT: shrdl %cl, %edi, %eax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: var_shift_i32_optsize:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: movl %edx, %ecx
; X64-SLOW-NEXT: movl %esi, %eax
; X64-SLOW-NEXT: shrdl %cl, %edi, %eax
; X64-SLOW-NEXT: andb $31, %cl
; X64-SLOW-NEXT: cmovel %esi, %eax
; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp ret i32 %tmp
} }