mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[X86] Lower to SHLD/SHRD on slow machines for optsize
Use consistent rules for when to lower to SHLD/SHRD for slow machines - fixes a weird issue where funnel shift gets expanded but then X86ISelLowering's combineOr sees the optsize and combines to SHLD/SHRD, but now with the modulo amount guard...... llvm-svn: 349285
This commit is contained in:
parent
4ab16cb47f
commit
79a641d507
@ -17047,9 +17047,9 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||||||
SDValue Op1 = Op.getOperand(1);
|
SDValue Op1 = Op.getOperand(1);
|
||||||
SDValue Amt = Op.getOperand(2);
|
SDValue Amt = Op.getOperand(2);
|
||||||
|
|
||||||
// Expand slow SHLD/SHRD cases.
|
// Expand slow SHLD/SHRD cases if we are not optimizing for size.
|
||||||
// TODO - can we be more selective here: OptSize/RMW etc.?
|
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
|
||||||
if (Subtarget.isSHLDSlow())
|
if (!OptForSize && Subtarget.isSHLDSlow())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
bool IsFSHR = Op.getOpcode() == ISD::FSHR;
|
bool IsFSHR = Op.getOpcode() == ISD::FSHR;
|
||||||
|
@ -179,46 +179,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
||||||
; X86-FAST-LABEL: var_shift_i32_optsize:
|
; X86-LABEL: var_shift_i32_optsize:
|
||||||
; X86-FAST: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-FAST-NEXT: shldl %cl, %edx, %eax
|
; X86-NEXT: shldl %cl, %edx, %eax
|
||||||
; X86-FAST-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X86-SLOW-LABEL: var_shift_i32_optsize:
|
; X64-LABEL: var_shift_i32_optsize:
|
||||||
; X86-SLOW: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X86-SLOW-NEXT: pushl %esi
|
; X64-NEXT: movl %edx, %ecx
|
||||||
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
|
; X64-NEXT: movl %edi, %eax
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X64-NEXT: shldl %cl, %esi, %eax
|
||||||
; X86-SLOW-NEXT: movl %eax, %edx
|
; X64-NEXT: retq
|
||||||
; X86-SLOW-NEXT: shldl %cl, %esi, %edx
|
|
||||||
; X86-SLOW-NEXT: andb $31, %cl
|
|
||||||
; X86-SLOW-NEXT: je .LBB3_2
|
|
||||||
; X86-SLOW-NEXT: # %bb.1:
|
|
||||||
; X86-SLOW-NEXT: movl %edx, %eax
|
|
||||||
; X86-SLOW-NEXT: .LBB3_2:
|
|
||||||
; X86-SLOW-NEXT: popl %esi
|
|
||||||
; X86-SLOW-NEXT: retl
|
|
||||||
;
|
|
||||||
; X64-FAST-LABEL: var_shift_i32_optsize:
|
|
||||||
; X64-FAST: # %bb.0:
|
|
||||||
; X64-FAST-NEXT: movl %edx, %ecx
|
|
||||||
; X64-FAST-NEXT: movl %edi, %eax
|
|
||||||
; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
||||||
; X64-FAST-NEXT: shldl %cl, %esi, %eax
|
|
||||||
; X64-FAST-NEXT: retq
|
|
||||||
;
|
|
||||||
; X64-SLOW-LABEL: var_shift_i32_optsize:
|
|
||||||
; X64-SLOW: # %bb.0:
|
|
||||||
; X64-SLOW-NEXT: movl %edx, %ecx
|
|
||||||
; X64-SLOW-NEXT: movl %edi, %eax
|
|
||||||
; X64-SLOW-NEXT: shldl %cl, %esi, %eax
|
|
||||||
; X64-SLOW-NEXT: andb $31, %cl
|
|
||||||
; X64-SLOW-NEXT: cmovel %edi, %eax
|
|
||||||
; X64-SLOW-NEXT: retq
|
|
||||||
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
||||||
ret i32 %tmp
|
ret i32 %tmp
|
||||||
}
|
}
|
||||||
|
@ -178,46 +178,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
|
||||||
; X86-FAST-LABEL: var_shift_i32_optsize:
|
; X86-LABEL: var_shift_i32_optsize:
|
||||||
; X86-FAST: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-FAST-NEXT: shrdl %cl, %edx, %eax
|
; X86-NEXT: shrdl %cl, %edx, %eax
|
||||||
; X86-FAST-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X86-SLOW-LABEL: var_shift_i32_optsize:
|
; X64-LABEL: var_shift_i32_optsize:
|
||||||
; X86-SLOW: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X86-SLOW-NEXT: pushl %esi
|
; X64-NEXT: movl %edx, %ecx
|
||||||
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
|
; X64-NEXT: movl %esi, %eax
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X64-NEXT: shrdl %cl, %edi, %eax
|
||||||
; X86-SLOW-NEXT: movl %eax, %edx
|
; X64-NEXT: retq
|
||||||
; X86-SLOW-NEXT: shrdl %cl, %esi, %edx
|
|
||||||
; X86-SLOW-NEXT: andb $31, %cl
|
|
||||||
; X86-SLOW-NEXT: je .LBB3_2
|
|
||||||
; X86-SLOW-NEXT: # %bb.1:
|
|
||||||
; X86-SLOW-NEXT: movl %edx, %eax
|
|
||||||
; X86-SLOW-NEXT: .LBB3_2:
|
|
||||||
; X86-SLOW-NEXT: popl %esi
|
|
||||||
; X86-SLOW-NEXT: retl
|
|
||||||
;
|
|
||||||
; X64-FAST-LABEL: var_shift_i32_optsize:
|
|
||||||
; X64-FAST: # %bb.0:
|
|
||||||
; X64-FAST-NEXT: movl %edx, %ecx
|
|
||||||
; X64-FAST-NEXT: movl %esi, %eax
|
|
||||||
; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
||||||
; X64-FAST-NEXT: shrdl %cl, %edi, %eax
|
|
||||||
; X64-FAST-NEXT: retq
|
|
||||||
;
|
|
||||||
; X64-SLOW-LABEL: var_shift_i32_optsize:
|
|
||||||
; X64-SLOW: # %bb.0:
|
|
||||||
; X64-SLOW-NEXT: movl %edx, %ecx
|
|
||||||
; X64-SLOW-NEXT: movl %esi, %eax
|
|
||||||
; X64-SLOW-NEXT: shrdl %cl, %edi, %eax
|
|
||||||
; X64-SLOW-NEXT: andb $31, %cl
|
|
||||||
; X64-SLOW-NEXT: cmovel %esi, %eax
|
|
||||||
; X64-SLOW-NEXT: retq
|
|
||||||
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
||||||
ret i32 %tmp
|
ret i32 %tmp
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user