From a3c6b40b1aae6cf859fb82568326b30a8e1354ce Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 24 Feb 2017 17:17:33 +0000 Subject: [PATCH] [DAGCombiner] add missing folds for scalar select of {-1,0,1} The motivation for filling out these select-of-constants cases goes back to D24480, where we discussed removing an IR fold from add(zext) --> select. And that goes back to: https://reviews.llvm.org/rL75531 https://reviews.llvm.org/rL159230 The idea is that we should always canonicalize patterns like this to a select-of-constants in IR because that's the smallest IR and the best for value tracking. Note that we currently do the opposite in some cases (like the cases in *this* patch). Ie, the proposed folds in this patch already exist in InstCombine today: https://github.com/llvm-mirror/llvm/blob/master/lib/Transforms/InstCombine/InstCombineSelect.cpp#L1151 As this patch shows, most targets generate better machine code for simple ext/add/not ops rather than a select of constants. So the follow-up steps to make this less of a patchwork of special-case folds and missing IR canonicalization: 1. Have DAGCombiner convert any select of constants into ext/add/not ops. 2 Have InstCombine canonicalize in the other direction (create more selects). Differential Revision: https://reviews.llvm.org/D30180 llvm-svn: 296137 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++++-- test/CodeGen/AMDGPU/trunc.ll | 12 ++--- test/CodeGen/ARM/select_const.ll | 35 ++++--------- test/CodeGen/Hexagon/adde.ll | 9 ++-- test/CodeGen/Hexagon/sube.ll | 8 ++- test/CodeGen/NVPTX/add-128bit.ll | 2 +- test/CodeGen/PowerPC/select_const.ll | 65 ++++-------------------- test/CodeGen/X86/select_const.ll | 18 +++---- 8 files changed, 72 insertions(+), 112 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 415046c3dbc..f395966d676 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5599,8 +5599,13 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (!isa(N1) || !isa(N2)) return SDValue(); - // TODO: We should handle other cases of selecting between {-1,0,1} here. - if (CondVT == MVT::i1) { + // Only do this before legalization to avoid conflicting with target-specific + // transforms in the other direction (create a select from a zext/sext). There + // is also a target-independent combine here in DAGCombiner in the other + // direction for (select Cond, -1, 0) when the condition is not i1. + // TODO: This could be generalized for any 2 constants that differ by 1: + // add ({s/z}ext Cond), C + if (CondVT == MVT::i1 && !LegalOperations) { if (isNullConstant(N1) && isOneConstant(N2)) { // select Cond, 0, 1 --> zext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); @@ -5608,6 +5613,25 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); return NotCond; } + if (isNullConstant(N1) && isAllOnesConstant(N2)) { + // select Cond, 0, -1 --> sext (!Cond) + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + if (VT != MVT::i1) + NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); + return NotCond; + } + if (isOneConstant(N1) && isNullConstant(N2)) { + // select Cond, 1, 0 --> zext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + return Cond; + } + if (isAllOnesConstant(N1) && isNullConstant(N2)) { + // select Cond, -1, 0 --> sext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + return Cond; + } return SDValue(); } @@ -6766,7 +6790,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N00VT); - if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT)) { + // Don't do this transform for i1 because there's a select transform + // that would reverse it. + // TODO: We should not do this transform at all without a target hook + // because a sext is likely cheaper than a select? + if (SetCCVT.getScalarSizeInBits() != 1 && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); } diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll index 2c2ce4c5d35..a8f10cc5a9c 100644 --- a/test/CodeGen/AMDGPU/trunc.ll +++ b/test/CodeGen/AMDGPU/trunc.ll @@ -56,8 +56,7 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 } ; GCN-LABEL: {{^}}trunc_i32_to_i1: -; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; GCN: v_cmp_eq_u32 +; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 1, v{{[0-9]+}} define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { %a = load i32, i32 addrspace(1)* %ptr, align 4 %trunc = trunc i32 %a to i1 @@ -67,8 +66,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { } ; GCN-LABEL: {{^}}trunc_i8_to_i1: -; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; GCN: v_cmp_eq_u32 +; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 1, v{{[0-9]+}} define void @trunc_i8_to_i1(i8 addrspace(1)* %out, i8 addrspace(1)* %ptr) { %a = load i8, i8 addrspace(1)* %ptr, align 4 %trunc = trunc i8 %a to i1 @@ -78,8 +76,7 @@ define void @trunc_i8_to_i1(i8 addrspace(1)* %out, i8 addrspace(1)* %ptr) { } ; GCN-LABEL: {{^}}sgpr_trunc_i16_to_i1: -; GCN: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}} -; GCN: v_cmp_eq_u32 +; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 define void @sgpr_trunc_i16_to_i1(i16 addrspace(1)* %out, i16 %a) { %trunc = trunc i16 %a to i1 %result = select i1 %trunc, i16 1, i16 0 @@ -88,8 +85,7 @@ define void @sgpr_trunc_i16_to_i1(i16 addrspace(1)* %out, i16 %a) { } ; GCN-LABEL: {{^}}sgpr_trunc_i32_to_i1: -; GCN: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}} -; GCN: v_cmp_eq_u32 +; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { %trunc = trunc i32 %a to i1 %result = select i1 %trunc, i32 1, i32 0 diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll index 018cb5dda58..9f4cb6fa4d5 100644 --- a/test/CodeGen/ARM/select_const.ll +++ b/test/CodeGen/ARM/select_const.ll @@ -40,8 +40,7 @@ define i32 @select_0_or_1_signext(i1 signext %cond) { define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: ; CHECK: @ BB#0: -; CHECK-NEXT: ands r0, r0, #1 -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -50,8 +49,6 @@ define i32 @select_1_or_0(i1 %cond) { define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_1_or_0_zeroext: ; CHECK: @ BB#0: -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -60,8 +57,7 @@ define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: ; CHECK: @ BB#0: -; CHECK-NEXT: ands r0, r0, #1 -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -72,10 +68,9 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: ; CHECK: @ BB#0: -; CHECK-NEXT: mvn r1, #0 -; CHECK-NEXT: tst r0, #1 -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, #1 +; CHECK-NEXT: bic r0, r1, r0 +; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -84,10 +79,8 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: ; CHECK: @ BB#0: -; CHECK-NEXT: mvn r1, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: eor r0, r0, #1 +; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -96,10 +89,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: ; CHECK: @ BB#0: -; CHECK-NEXT: mvn r1, #0 -; CHECK-NEXT: tst r0, #1 -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mvn r0, r0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -110,8 +100,8 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: @ BB#0: -; CHECK-NEXT: ands r0, r0, #1 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -120,8 +110,7 @@ define i32 @select_neg1_or_0(i1 %cond) { define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: @ BB#0: -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -130,8 +119,6 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { define i32 @select_neg1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_neg1_or_0_signext: ; CHECK: @ BB#0: -; CHECK-NEXT: ands r0, r0, #1 -; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll index 67594ad03be..5af3b071cd0 100644 --- a/test/CodeGen/Hexagon/adde.ll +++ b/test/CodeGen/Hexagon/adde.ll @@ -1,13 +1,12 @@ ; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 -disable-post-ra < %s | FileCheck %s -; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#1) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#0) ; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) +; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#1) ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}},r{{[0-9]+}}) +; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},#1,#0) +; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,r{{[0-9]+}}) +; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) ; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) ; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}},r{{[0-9]+}}) diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll index 861f361a2c5..bd0da980cf7 100644 --- a/test/CodeGen/Hexagon/sube.ll +++ b/test/CodeGen/Hexagon/sube.ll @@ -1,13 +1,11 @@ ; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 -disable-post-ra < %s | FileCheck %s -; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#0) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,#1) ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) +; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}},#1,#0 +; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) +; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0,r{{[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}},r{{[0-9]+}}) define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) { entry: diff --git a/test/CodeGen/NVPTX/add-128bit.ll b/test/CodeGen/NVPTX/add-128bit.ll index 29e3cdffae7..a077c3fcf89 100644 --- a/test/CodeGen/NVPTX/add-128bit.ll +++ b/test/CodeGen/NVPTX/add-128bit.ll @@ -8,7 +8,7 @@ define void @foo(i64 %a, i64 %add, i128* %retptr) { ; CHECK: add.s64 ; CHECK: setp.lt.u64 ; CHECK: setp.lt.u64 -; CHECK: selp.b64 +; CHECK: selp.u64 ; CHECK: selp.b64 ; CHECK: add.s64 %t1 = sext i64 %a to i128 diff --git a/test/CodeGen/PowerPC/select_const.ll b/test/CodeGen/PowerPC/select_const.ll index 77d0e420c58..ebf664ee10b 100644 --- a/test/CodeGen/PowerPC/select_const.ll +++ b/test/CodeGen/PowerPC/select_const.ll @@ -39,70 +39,27 @@ define i32 @select_0_or_1_signext(i1 signext %cond) { ; select Cond, 1, 0 --> zext (Cond) define i32 @select_1_or_0(i1 %cond) { -; ISEL-LABEL: select_1_or_0: -; ISEL: # BB#0: -; ISEL-NEXT: andi. 3, 3, 1 -; ISEL-NEXT: li 4, 1 -; ISEL-NEXT: li 3, 0 -; ISEL-NEXT: isel 3, 4, 3, 1 -; ISEL-NEXT: blr -; -; NO_ISEL-LABEL: select_1_or_0: -; NO_ISEL: # BB#0: -; NO_ISEL-NEXT: andi. 3, 3, 1 -; NO_ISEL-NEXT: li 4, 1 -; NO_ISEL-NEXT: li 3, 0 -; NO_ISEL-NEXT: bc 12, 1, .LBB3_1 -; NO_ISEL-NEXT: blr -; NO_ISEL-NEXT: .LBB3_1: -; NO_ISEL-NEXT: addi 3, 4, 0 -; NO_ISEL-NEXT: blr +; ALL-LABEL: select_1_or_0: +; ALL: # BB#0: +; ALL-NEXT: clrldi 3, 3, 63 +; ALL-NEXT: blr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel } define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { -; ISEL-LABEL: select_1_or_0_zeroext: -; ISEL: # BB#0: -; ISEL-NEXT: andi. 3, 3, 1 -; ISEL-NEXT: li 4, 1 -; ISEL-NEXT: li 3, 0 -; ISEL-NEXT: isel 3, 4, 3, 1 -; ISEL-NEXT: blr -; -; NO_ISEL-LABEL: select_1_or_0_zeroext: -; NO_ISEL: # BB#0: -; NO_ISEL-NEXT: andi. 3, 3, 1 -; NO_ISEL-NEXT: li 4, 1 -; NO_ISEL-NEXT: li 3, 0 -; NO_ISEL-NEXT: bc 12, 1, .LBB4_1 -; NO_ISEL-NEXT: blr -; NO_ISEL-NEXT: .LBB4_1: -; NO_ISEL-NEXT: addi 3, 4, 0 -; NO_ISEL-NEXT: blr +; ALL-LABEL: select_1_or_0_zeroext: +; ALL: # BB#0: +; ALL-NEXT: blr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel } define i32 @select_1_or_0_signext(i1 signext %cond) { -; ISEL-LABEL: select_1_or_0_signext: -; ISEL: # BB#0: -; ISEL-NEXT: andi. 3, 3, 1 -; ISEL-NEXT: li 4, 1 -; ISEL-NEXT: li 3, 0 -; ISEL-NEXT: isel 3, 4, 3, 1 -; ISEL-NEXT: blr -; -; NO_ISEL-LABEL: select_1_or_0_signext: -; NO_ISEL: # BB#0: -; NO_ISEL-NEXT: andi. 3, 3, 1 -; NO_ISEL-NEXT: li 4, 1 -; NO_ISEL-NEXT: li 3, 0 -; NO_ISEL-NEXT: bc 12, 1, .LBB5_1 -; NO_ISEL-NEXT: blr -; NO_ISEL-NEXT: .LBB5_1: -; NO_ISEL-NEXT: addi 3, 4, 0 -; NO_ISEL-NEXT: blr +; ALL-LABEL: select_1_or_0_signext: +; ALL: # BB#0: +; ALL-NEXT: clrldi 3, 3, 63 +; ALL-NEXT: blr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel } diff --git a/test/CodeGen/X86/select_const.ll b/test/CodeGen/X86/select_const.ll index eb9abcbdcb8..82054a314a5 100644 --- a/test/CodeGen/X86/select_const.ll +++ b/test/CodeGen/X86/select_const.ll @@ -108,10 +108,9 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: negl %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -120,10 +119,8 @@ define i32 @select_neg1_or_0(i1 %cond) { define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: # BB#0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -132,10 +129,7 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { define i32 @select_neg1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_neg1_or_0_signext: ; CHECK: # BB#0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: movsbl %dil, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel