From c04a5f29d7b0255dd206bea2d4c7aea42f7ef374 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Jun 2017 18:50:26 +0000 Subject: [PATCH] [InstCombine][InstSimplify] Add various tests for ctlz/cttz with vectors, some showing missed optimizations. NFC llvm-svn: 304667 --- test/Transforms/InstCombine/intrinsics.ll | 160 ++++++++++++++++++++++ test/Transforms/InstSimplify/call.ll | 11 ++ 2 files changed, 171 insertions(+) diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 280098840ba..04b9088e362 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -21,6 +21,7 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone +declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone declare double @llvm.cos.f64(double %Val) nounwind readonly declare double @llvm.sin.f64(double %Val) nounwind readonly declare double @llvm.floor.f64(double %Val) nounwind readonly @@ -282,6 +283,19 @@ define i32 @cttz(i32 %a) { ret i32 %count } +define <2 x i32> @cttz_vec(<2 x i32> %a) { +; CHECK-LABEL: @cttz_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[OR]], +; CHECK-NEXT: [[COUNT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[AND]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[COUNT]] +; + %or = or <2 x i32> %a, + %and = and <2 x i32> %or, + %count = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %and, i1 true) nounwind readnone + ret <2 x i32> %count +} + define i1 @cttz_knownbits(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits( ; CHECK-NEXT: ret i1 false @@ -292,6 +306,16 @@ define i1 @cttz_knownbits(i32 %arg) { ret i1 %res } +define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) { +; CHECK-LABEL: @cttz_knownbits_vec( +; CHECK-NEXT: ret <2 x i1> zeroinitializer +; + %or = or <2 x i32> %arg, + %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i32> %cnt, + ret <2 x i1> %res +} + define i1 @cttz_knownbits2(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 @@ -305,6 +329,19 @@ define i1 @cttz_knownbits2(i32 %arg) { ret i1 %res } +define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) { +; CHECK-LABEL: @cttz_knownbits2_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], +; CHECK-NEXT: ret <2 x i1> [[RES]] +; + %or = or <2 x i32> %arg, + %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i32> %cnt, + ret <2 x i1> %res +} + ; TODO: The icmp is unnecessary given the known bits of the input. define i1 @cttz_knownbits3(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits3( @@ -319,6 +356,20 @@ define i1 @cttz_knownbits3(i32 %arg) { ret i1 %res } +; TODO: The icmp is unnecessary given the known bits of the input. +define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) { +; CHECK-LABEL: @cttz_knownbits3_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], +; CHECK-NEXT: ret <2 x i1> [[RES]] +; + %or = or <2 x i32> %arg, + %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i32> %cnt, + ret <2 x i1> %res +} + define i8 @ctlz(i8 %a) { ; CHECK-LABEL: @ctlz( ; CHECK-NEXT: ret i8 2 @@ -329,6 +380,19 @@ define i8 @ctlz(i8 %a) { ret i8 %count } +define <2 x i8> @ctlz_vec(<2 x i8> %a) { +; CHECK-LABEL: @ctlz_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[OR]], +; CHECK-NEXT: [[COUNT:%.*]] = tail call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[AND]], i1 true) +; CHECK-NEXT: ret <2 x i8> [[COUNT]] +; + %or = or <2 x i8> %a, + %and = and <2 x i8> %or, + %count = tail call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %and, i1 true) nounwind readnone + ret <2 x i8> %count +} + define i1 @ctlz_knownbits(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits( ; CHECK-NEXT: ret i1 false @@ -339,6 +403,16 @@ define i1 @ctlz_knownbits(i8 %arg) { ret i1 %res } +define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) { +; CHECK-LABEL: @ctlz_knownbits_vec( +; CHECK-NEXT: ret <2 x i1> zeroinitializer +; + %or = or <2 x i8> %arg, + %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i8> %cnt, + ret <2 x i1> %res +} + define i1 @ctlz_knownbits2(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits2( ; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 @@ -352,6 +426,19 @@ define i1 @ctlz_knownbits2(i8 %arg) { ret i1 %res } +define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) { +; CHECK-LABEL: @ctlz_knownbits2_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], +; CHECK-NEXT: ret <2 x i1> [[RES]] +; + %or = or <2 x i8> %arg, + %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i8> %cnt, + ret <2 x i1> %res +} + ; TODO: The icmp is unnecessary given the known bits of the input. define i1 @ctlz_knownbits3(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits3( @@ -366,6 +453,20 @@ define i1 @ctlz_knownbits3(i8 %arg) { ret i1 %res } +; TODO: The icmp is unnecessary given the known bits of the input. +define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) { +; CHECK-LABEL: @ctlz_knownbits3_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], +; CHECK-NEXT: ret <2 x i1> [[RES]] +; + %or = or <2 x i8> %arg, + %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone + %res = icmp eq <2 x i8> %cnt, + ret <2 x i1> %res +} + define void @cmp.simplify(i32 %a, i32 %b, i1* %c) { %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone %lz.cmp = icmp eq i32 %lz, 32 @@ -434,6 +535,15 @@ define i32 @ctlz_undef(i32 %Value) { ret i32 %ctlz } +define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) { +; CHECK-LABEL: @ctlz_undef_vec( +; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true) +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true) + ret <2 x i32> %ctlz +} + define i32 @ctlz_make_undef(i32 %a) { %or = or i32 %a, 8 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false) @@ -444,13 +554,32 @@ define i32 @ctlz_make_undef(i32 %a) { ; CHECK-NEXT: ret i32 %ctlz } +define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) { +; CHECK-LABEL: @ctlz_make_undef_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[CTLZ]] +; + %or = or <2 x i32> %a, + %ctlz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %or, i1 false) + ret <2 x i32> %ctlz +} + define i32 @cttz_undef(i32 %Value) nounwind { ; CHECK-LABEL: @cttz_undef( ; CHECK-NEXT: ret i32 undef ; %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true) ret i32 %cttz +} +define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind { +; CHECK-LABEL: @cttz_undef_vec( +; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true) +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true) + ret <2 x i32> %cttz } define i32 @cttz_make_undef(i32 %a) { @@ -463,6 +592,17 @@ define i32 @cttz_make_undef(i32 %a) { ; CHECK-NEXT: ret i32 %cttz } +define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) { +; CHECK-LABEL: @cttz_make_undef_vec( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[CTTZ]] +; + %or = or <2 x i32> %a, + %cttz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 false) + ret <2 x i32> %cttz +} + define i32 @ctlz_select(i32 %Value) nounwind { ; CHECK-LABEL: @ctlz_select( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 %Value, i1 false) @@ -472,7 +612,17 @@ define i32 @ctlz_select(i32 %Value) nounwind { %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true) %s = select i1 %tobool, i32 %ctlz, i32 32 ret i32 %s +} +define <2 x i32> @ctlz_select_vec(<2 x i32> %Value) nounwind { +; CHECK-LABEL: @ctlz_select_vec( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %tobool = icmp ne <2 x i32> %Value, zeroinitializer + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %Value, i1 true) + %s = select <2 x i1> %tobool, <2 x i32> %ctlz, <2 x i32> + ret <2 x i32> %s } define i32 @cttz_select(i32 %Value) nounwind { @@ -484,7 +634,17 @@ define i32 @cttz_select(i32 %Value) nounwind { %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true) %s = select i1 %tobool, i32 %cttz, i32 32 ret i32 %s +} +define <2 x i32> @cttz_select_vec(<2 x i32> %Value) nounwind { +; CHECK-LABEL: @cttz_select_vec( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %tobool = icmp ne <2 x i32> %Value, zeroinitializer + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %Value, i1 true) + %s = select <2 x i1> %tobool, <2 x i32> %cttz, <2 x i32> + ret <2 x i32> %s } define i1 @overflow_div_add(i32 %v1, i32 %v2) nounwind { diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll index 68daac65ee6..ee8f35aa7cd 100644 --- a/test/Transforms/InstSimplify/call.ll +++ b/test/Transforms/InstSimplify/call.ll @@ -199,6 +199,17 @@ define i256 @test_cttz() { ret i256 %x } +declare <2 x i256> @llvm.cttz.v2i256(<2 x i256> %src, i1 %is_zero_undef) + +define <2 x i256> @test_cttz_vec() { +; CHECK-LABEL: @test_cttz_vec( +; CHECK-NEXT: [[X:%.*]] = call <2 x i256> @llvm.cttz.v2i256(<2 x i256> , i1 false) +; CHECK-NEXT: ret <2 x i256> [[X]] +; + %x = call <2 x i256> @llvm.cttz.v2i256(<2 x i256> , i1 false) + ret <2 x i256> %x +} + declare i256 @llvm.ctpop.i256(i256 %src) define i256 @test_ctpop() {