From 79a641d5077d65e3f02fca1088b8c0a4c56fce33 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 15 Dec 2018 19:43:44 +0000 Subject: [PATCH] [X86] Lower to SHLD/SHRD on slow machines for optsize Use consistent rules for when to lower to SHLD/SHRD for slow machines - fixes a weird issue where funnel shift gets expanded but then X86ISelLowering's combineOr sees the optsize and combines to SHLD/SHRD, but now with the modulo amount guard...... llvm-svn: 349285 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++-- test/CodeGen/X86/fshl.ll | 53 ++++++++---------------------- test/CodeGen/X86/fshr.ll | 53 ++++++++---------------------- 3 files changed, 31 insertions(+), 81 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3c1e52dec28..72d1fb00fc9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17047,9 +17047,9 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, SDValue Op1 = Op.getOperand(1); SDValue Amt = Op.getOperand(2); - // Expand slow SHLD/SHRD cases. - // TODO - can we be more selective here: OptSize/RMW etc.? - if (Subtarget.isSHLDSlow()) + // Expand slow SHLD/SHRD cases if we are not optimizing for size. + bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); + if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); bool IsFSHR = Op.getOpcode() == ISD::FSHR; diff --git a/test/CodeGen/X86/fshl.ll b/test/CodeGen/X86/fshl.ll index 1a55f8462fe..ccf451e0451 100644 --- a/test/CodeGen/X86/fshl.ll +++ b/test/CodeGen/X86/fshl.ll @@ -179,46 +179,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { } define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { -; X86-FAST-LABEL: var_shift_i32_optsize: -; X86-FAST: # %bb.0: -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shldl %cl, %edx, %eax -; X86-FAST-NEXT: retl +; X86-LABEL: var_shift_i32_optsize: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: retl ; -; X86-SLOW-LABEL: var_shift_i32_optsize: -; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: shldl %cl, %esi, %edx -; X86-SLOW-NEXT: andb $31, %cl -; X86-SLOW-NEXT: je .LBB3_2 -; X86-SLOW-NEXT: # %bb.1: -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: .LBB3_2: -; X86-SLOW-NEXT: popl %esi -; X86-SLOW-NEXT: retl -; -; X64-FAST-LABEL: var_shift_i32_optsize: -; X64-FAST: # %bb.0: -; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: movl %edi, %eax -; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-FAST-NEXT: shldl %cl, %esi, %eax -; X64-FAST-NEXT: retq -; -; X64-SLOW-LABEL: var_shift_i32_optsize: -; X64-SLOW: # %bb.0: -; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: movl %edi, %eax -; X64-SLOW-NEXT: shldl %cl, %esi, %eax -; X64-SLOW-NEXT: andb $31, %cl -; X64-SLOW-NEXT: cmovel %edi, %eax -; X64-SLOW-NEXT: retq +; X64-LABEL: var_shift_i32_optsize: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax +; X64-NEXT: retq %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) ret i32 %tmp } diff --git a/test/CodeGen/X86/fshr.ll b/test/CodeGen/X86/fshr.ll index 09b8da577fb..09d63b66534 100644 --- a/test/CodeGen/X86/fshr.ll +++ b/test/CodeGen/X86/fshr.ll @@ -178,46 +178,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { } define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { -; X86-FAST-LABEL: var_shift_i32_optsize: -; X86-FAST: # %bb.0: -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shrdl %cl, %edx, %eax -; X86-FAST-NEXT: retl +; X86-LABEL: var_shift_i32_optsize: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: retl ; -; X86-SLOW-LABEL: var_shift_i32_optsize: -; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: shrdl %cl, %esi, %edx -; X86-SLOW-NEXT: andb $31, %cl -; X86-SLOW-NEXT: je .LBB3_2 -; X86-SLOW-NEXT: # %bb.1: -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: .LBB3_2: -; X86-SLOW-NEXT: popl %esi -; X86-SLOW-NEXT: retl -; -; X64-FAST-LABEL: var_shift_i32_optsize: -; X64-FAST: # %bb.0: -; X64-FAST-NEXT: movl %edx, %ecx -; X64-FAST-NEXT: movl %esi, %eax -; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-FAST-NEXT: shrdl %cl, %edi, %eax -; X64-FAST-NEXT: retq -; -; X64-SLOW-LABEL: var_shift_i32_optsize: -; X64-SLOW: # %bb.0: -; X64-SLOW-NEXT: movl %edx, %ecx -; X64-SLOW-NEXT: movl %esi, %eax -; X64-SLOW-NEXT: shrdl %cl, %edi, %eax -; X64-SLOW-NEXT: andb $31, %cl -; X64-SLOW-NEXT: cmovel %esi, %eax -; X64-SLOW-NEXT: retq +; X64-LABEL: var_shift_i32_optsize: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %edi, %eax +; X64-NEXT: retq %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) ret i32 %tmp }