From 8cc9fd185859de3c81111136a8be43fc5da2a2ea Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Thu, 6 Oct 2016 02:20:46 +0000 Subject: [PATCH] [AMDGPU] Promote uniform i16 bitreverse intrinsic to i32 Differential Revision: https://reviews.llvm.org/D25121 llvm-svn: 283415 --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 76 +- .../amdgpu-codegenprepare-i16-to-i32.ll | 1016 ++++++++++------- 2 files changed, 665 insertions(+), 427 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 88b1be2e6b8..2378465a231 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -77,14 +77,14 @@ class AMDGPUCodeGenPrepare : public FunctionPass, /// /// \returns True if 16 bit binary operation is promoted to equivalent 32 bit /// binary operation, false otherwise. - bool promoteUniformI16OpToI32Op(BinaryOperator &I) const; + bool promoteUniformI16OpToI32(BinaryOperator &I) const; /// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp' /// operation by sign or zero extending operands to 32 bits, and replacing 16 /// bit operation with 32 bit operation. /// /// \returns True. - bool promoteUniformI16OpToI32Op(ICmpInst &I) const; + bool promoteUniformI16OpToI32(ICmpInst &I) const; /// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select' /// operation by sign or zero extending operands to 32 bits, replacing 16 bit @@ -92,7 +92,16 @@ class AMDGPUCodeGenPrepare : public FunctionPass, /// operation back to 16 bits. /// /// \returns True. - bool promoteUniformI16OpToI32Op(SelectInst &I) const; + bool promoteUniformI16OpToI32(SelectInst &I) const; + + /// \brief Promotes uniform 16 bit 'bitreverse' intrinsic \p I to 32 bit + /// 'bitreverse' intrinsic by zero extending operand to 32 bits, replacing 16 + /// bit intrinsic with 32 bit intrinsic, shifting the result of 32 bit + /// intrinsic 16 bits to the right with zero fill, and truncating the result + /// of shift operation back to 16 bits. + /// + /// \returns True. + bool promoteUniformI16BitreverseIntrinsicToI32(IntrinsicInst &I) const; public: static char ID; @@ -111,6 +120,9 @@ public: bool visitICmpInst(ICmpInst &I); bool visitSelectInst(SelectInst &I); + bool visitIntrinsicInst(IntrinsicInst &I); + bool visitBitreverseIntrinsicInst(IntrinsicInst &I); + bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -181,8 +193,8 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const { cast(I.getOperand(0))->isSigned() : false; } -bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const { - assert(isI16Ty(I.getType()) && "Op must be 16 bits"); +bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(BinaryOperator &I) const { + assert(isI16Ty(I.getType()) && "I must be 16 bits"); if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv) return false; @@ -212,7 +224,7 @@ bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const { return true; } -bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const { +bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(ICmpInst &I) const { assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits"); assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits"); @@ -240,8 +252,8 @@ bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const { return true; } -bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const { - assert(isI16Ty(I.getType()) && "Op must be 16 bits"); +bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(SelectInst &I) const { + assert(isI16Ty(I.getType()) && "I must be 16 bits"); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); @@ -268,6 +280,29 @@ bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const { return true; } +bool AMDGPUCodeGenPrepare::promoteUniformI16BitreverseIntrinsicToI32( + IntrinsicInst &I) const { + assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse"); + assert(isI16Ty(I.getType()) && "I must be 16 bits"); + + IRBuilder<> Builder(&I); + Builder.SetCurrentDebugLocation(I.getDebugLoc()); + + Type *I32Ty = getI32Ty(Builder, I.getType()); + Function *I32 = + Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });; + Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); + Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); + Value *LShrOp = Builder.CreateLShr(ExtRes, 16); + Value *TruncRes = + Builder.CreateTrunc(LShrOp, getI16Ty(Builder, ExtRes->getType())); + + I.replaceAllUsesWith(TruncRes); + I.eraseFromParent(); + + return true; +} + static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) { const ConstantFP *CNum = dyn_cast(Num); if (!CNum) @@ -357,7 +392,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { // TODO: Should we promote smaller types that will be legalized to i16? if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I)) - Changed |= promoteUniformI16OpToI32Op(I); + Changed |= promoteUniformI16OpToI32(I); return Changed; } @@ -368,7 +403,7 @@ bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) { // TODO: Should we promote smaller types that will be legalized to i16? if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) && isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I)) - Changed |= promoteUniformI16OpToI32Op(I); + Changed |= promoteUniformI16OpToI32(I); return Changed; } @@ -378,7 +413,26 @@ bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) { // TODO: Should we promote smaller types that will be legalized to i16? if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I)) - Changed |= promoteUniformI16OpToI32Op(I); + Changed |= promoteUniformI16OpToI32(I); + + return Changed; +} + +bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { + switch (I.getIntrinsicID()) { + case Intrinsic::bitreverse: + return visitBitreverseIntrinsicInst(I); + default: + return false; + } +} + +bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) { + bool Changed = false; + + // TODO: Should we promote smaller types that will be legalized to i16? + if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I)) + Changed |= promoteUniformI16BitreverseIntrinsicToI32(I); return Changed; } diff --git a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll index a4a882ed844..965e0c08255 100644 --- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -1,856 +1,1040 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s -; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; SI-NOT: zext -; SI-NOT: sext -; SI-NOT: trunc - -; VI-LABEL: @add_i16( +; GCN-LABEL: @add_i16( +; SI: %r = add i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @add_i16(i16 %a, i16 %b) { %r = add i16 %a, %b ret i16 %r } -; VI-LABEL: @add_nsw_i16( +; GCN-LABEL: @add_nsw_i16( +; SI: %r = add nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @add_nsw_i16(i16 %a, i16 %b) { %r = add nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @add_nuw_i16( +; GCN-LABEL: @add_nuw_i16( +; SI: %r = add nuw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @add_nuw_i16(i16 %a, i16 %b) { %r = add nuw i16 %a, %b ret i16 %r } -; VI-LABEL: @add_nuw_nsw_i16( +; GCN-LABEL: @add_nuw_nsw_i16( +; SI: %r = add nuw nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) { %r = add nuw nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @sub_i16( +; GCN-LABEL: @sub_i16( +; SI: %r = sub i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @sub_i16(i16 %a, i16 %b) { %r = sub i16 %a, %b ret i16 %r } -; VI-LABEL: @sub_nsw_i16( +; GCN-LABEL: @sub_nsw_i16( +; SI: %r = sub nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @sub_nsw_i16(i16 %a, i16 %b) { %r = sub nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @sub_nuw_i16( +; GCN-LABEL: @sub_nuw_i16( +; SI: %r = sub nuw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @sub_nuw_i16(i16 %a, i16 %b) { %r = sub nuw i16 %a, %b ret i16 %r } -; VI-LABEL: @sub_nuw_nsw_i16( +; GCN-LABEL: @sub_nuw_nsw_i16( +; SI: %r = sub nuw nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) { %r = sub nuw nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @mul_i16( +; GCN-LABEL: @mul_i16( +; SI: %r = mul i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @mul_i16(i16 %a, i16 %b) { %r = mul i16 %a, %b ret i16 %r } -; VI-LABEL: @mul_nsw_i16( +; GCN-LABEL: @mul_nsw_i16( +; SI: %r = mul nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @mul_nsw_i16(i16 %a, i16 %b) { %r = mul nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @mul_nuw_i16( +; GCN-LABEL: @mul_nuw_i16( +; SI: %r = mul nuw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @mul_nuw_i16(i16 %a, i16 %b) { %r = mul nuw i16 %a, %b ret i16 %r } -; VI-LABEL: @mul_nuw_nsw_i16( +; GCN-LABEL: @mul_nuw_nsw_i16( +; SI: %r = mul nuw nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) { %r = mul nuw nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @urem_i16( +; GCN-LABEL: @urem_i16( +; SI: %r = urem i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @urem_i16(i16 %a, i16 %b) { %r = urem i16 %a, %b ret i16 %r } -; VI-LABEL: @srem_i16( +; GCN-LABEL: @srem_i16( +; SI: %r = srem i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @srem_i16(i16 %a, i16 %b) { %r = srem i16 %a, %b ret i16 %r } -; VI-LABEL: @shl_i16( +; GCN-LABEL: @shl_i16( +; SI: %r = shl i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @shl_i16(i16 %a, i16 %b) { %r = shl i16 %a, %b ret i16 %r } -; VI-LABEL: @shl_nsw_i16( +; GCN-LABEL: @shl_nsw_i16( +; SI: %r = shl nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @shl_nsw_i16(i16 %a, i16 %b) { %r = shl nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @shl_nuw_i16( +; GCN-LABEL: @shl_nuw_i16( +; SI: %r = shl nuw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @shl_nuw_i16(i16 %a, i16 %b) { %r = shl nuw i16 %a, %b ret i16 %r } -; VI-LABEL: @shl_nuw_nsw_i16( +; GCN-LABEL: @shl_nuw_nsw_i16( +; SI: %r = shl nuw nsw i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) { %r = shl nuw nsw i16 %a, %b ret i16 %r } -; VI-LABEL: @lshr_i16( +; GCN-LABEL: @lshr_i16( +; SI: %r = lshr i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @lshr_i16(i16 %a, i16 %b) { %r = lshr i16 %a, %b ret i16 %r } -; VI-LABEL: @lshr_exact_i16( +; GCN-LABEL: @lshr_exact_i16( +; SI: %r = lshr exact i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @lshr_exact_i16(i16 %a, i16 %b) { %r = lshr exact i16 %a, %b ret i16 %r } -; VI-LABEL: @ashr_i16( +; GCN-LABEL: @ashr_i16( +; SI: %r = ashr i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @ashr_i16(i16 %a, i16 %b) { %r = ashr i16 %a, %b ret i16 %r } -; VI-LABEL: @ashr_exact_i16( +; GCN-LABEL: @ashr_exact_i16( +; SI: %r = ashr exact i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @ashr_exact_i16(i16 %a, i16 %b) { %r = ashr exact i16 %a, %b ret i16 %r } -; VI-LABEL: @and_i16( +; GCN-LABEL: @and_i16( +; SI: %r = and i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @and_i16(i16 %a, i16 %b) { %r = and i16 %a, %b ret i16 %r } -; VI-LABEL: @or_i16( +; GCN-LABEL: @or_i16( +; SI: %r = or i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @or_i16(i16 %a, i16 %b) { %r = or i16 %a, %b ret i16 %r } -; VI-LABEL: @xor_i16( +; GCN-LABEL: @xor_i16( +; SI: %r = xor i16 %a, %b +; SI-NEXT: ret i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 -; VI: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI: ret i16 %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] define i16 @xor_i16(i16 %a, i16 %b) { %r = xor i16 %a, %b ret i16 %r } -; VI-LABEL: @select_eq_i16( +; GCN-LABEL: @select_eq_i16( +; SI: %cmp = icmp eq i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_eq_i16(i16 %a, i16 %b) { %cmp = icmp eq i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_ne_i16( +; GCN-LABEL: @select_ne_i16( +; SI: %cmp = icmp ne i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_ne_i16(i16 %a, i16 %b) { %cmp = icmp ne i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_ugt_i16( +; GCN-LABEL: @select_ugt_i16( +; SI: %cmp = icmp ugt i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_ugt_i16(i16 %a, i16 %b) { %cmp = icmp ugt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_uge_i16( +; GCN-LABEL: @select_uge_i16( +; SI: %cmp = icmp uge i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_uge_i16(i16 %a, i16 %b) { %cmp = icmp uge i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_ult_i16( +; GCN-LABEL: @select_ult_i16( +; SI: %cmp = icmp ult i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_ult_i16(i16 %a, i16 %b) { %cmp = icmp ult i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_ule_i16( +; GCN-LABEL: @select_ule_i16( +; SI: %cmp = icmp ule i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_ule_i16(i16 %a, i16 %b) { %cmp = icmp ule i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_sgt_i16( +; GCN-LABEL: @select_sgt_i16( +; SI: %cmp = icmp sgt i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_sgt_i16(i16 %a, i16 %b) { %cmp = icmp sgt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_sge_i16( +; GCN-LABEL: @select_sge_i16( +; SI: %cmp = icmp sge i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_sge_i16(i16 %a, i16 %b) { %cmp = icmp sge i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_slt_i16( +; GCN-LABEL: @select_slt_i16( +; SI: %cmp = icmp slt i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_slt_i16(i16 %a, i16 %b) { %cmp = icmp slt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @select_sle_i16( +; GCN-LABEL: @select_sle_i16( +; SI: %cmp = icmp sle i16 %a, %b +; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b +; SI-NEXT: ret i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 -; VI: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 -; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 -; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI: ret i16 %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI-NEXT: ret i16 %[[SEL_16]] define i16 @select_sle_i16(i16 %a, i16 %b) { %cmp = icmp sle i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b ret i16 %sel } -; VI-LABEL: @add_3xi16( +declare i16 @llvm.bitreverse.i16(i16) +; GCN-LABEL: @bitreverse_i16( +; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a) +; SI-NEXT: ret i16 %brev +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]]) +; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16 +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] +define i16 @bitreverse_i16(i16 %a) { + %brev = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %brev +} + +; GCN-LABEL: @add_3xi16( +; SI: %r = add <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @add_nsw_3xi16( +; GCN-LABEL: @add_nsw_3xi16( +; SI: %r = add nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @add_nuw_3xi16( +; GCN-LABEL: @add_nuw_3xi16( +; SI: %r = add nuw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nuw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @add_nuw_nsw_3xi16( +; GCN-LABEL: @add_nuw_nsw_3xi16( +; SI: %r = add nuw nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nuw nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @sub_3xi16( +; GCN-LABEL: @sub_3xi16( +; SI: %r = sub <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @sub_nsw_3xi16( +; GCN-LABEL: @sub_nsw_3xi16( +; SI: %r = sub nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @sub_nuw_3xi16( +; GCN-LABEL: @sub_nuw_3xi16( +; SI: %r = sub nuw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nuw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @sub_nuw_nsw_3xi16( +; GCN-LABEL: @sub_nuw_nsw_3xi16( +; SI: %r = sub nuw nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nuw nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @mul_3xi16( +; GCN-LABEL: @mul_3xi16( +; SI: %r = mul <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @mul_nsw_3xi16( +; GCN-LABEL: @mul_nsw_3xi16( +; SI: %r = mul nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @mul_nuw_3xi16( +; GCN-LABEL: @mul_nuw_3xi16( +; SI: %r = mul nuw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nuw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @mul_nuw_nsw_3xi16( +; GCN-LABEL: @mul_nuw_nsw_3xi16( +; SI: %r = mul nuw nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nuw nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @urem_3xi16( +; GCN-LABEL: @urem_3xi16( +; SI: %r = urem <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = urem <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @srem_3xi16( +; GCN-LABEL: @srem_3xi16( +; SI: %r = srem <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = srem <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @shl_3xi16( +; GCN-LABEL: @shl_3xi16( +; SI: %r = shl <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @shl_nsw_3xi16( +; GCN-LABEL: @shl_nsw_3xi16( +; SI: %r = shl nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @shl_nuw_3xi16( +; GCN-LABEL: @shl_nuw_3xi16( +; SI: %r = shl nuw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nuw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @shl_nuw_nsw_3xi16( +; GCN-LABEL: @shl_nuw_nsw_3xi16( +; SI: %r = shl nuw nsw <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nuw nsw <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @lshr_3xi16( +; GCN-LABEL: @lshr_3xi16( +; SI: %r = lshr <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = lshr <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @lshr_exact_3xi16( +; GCN-LABEL: @lshr_exact_3xi16( +; SI: %r = lshr exact <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = lshr exact <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @ashr_3xi16( +; GCN-LABEL: @ashr_3xi16( +; SI: %r = ashr <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = ashr <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @ashr_exact_3xi16( +; GCN-LABEL: @ashr_exact_3xi16( +; SI: %r = ashr exact <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = ashr exact <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @and_3xi16( +; GCN-LABEL: @and_3xi16( +; SI: %r = and <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = and <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @or_3xi16( +; GCN-LABEL: @or_3xi16( +; SI: %r = or <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = or <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @xor_3xi16( +; GCN-LABEL: @xor_3xi16( +; SI: %r = xor <3 x i16> %a, %b +; SI-NEXT: ret <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] -; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI: ret <3 x i16> %[[R_16]] +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = xor <3 x i16> %a, %b ret <3 x i16> %r } -; VI-LABEL: @select_eq_3xi16( +; GCN-LABEL: @select_eq_3xi16( +; SI: %cmp = icmp eq <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp eq <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_ne_3xi16( +; GCN-LABEL: @select_ne_3xi16( +; SI: %cmp = icmp ne <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ne <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_ugt_3xi16( +; GCN-LABEL: @select_ugt_3xi16( +; SI: %cmp = icmp ugt <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ugt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_uge_3xi16( +; GCN-LABEL: @select_uge_3xi16( +; SI: %cmp = icmp uge <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp uge <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_ult_3xi16( +; GCN-LABEL: @select_ult_3xi16( +; SI: %cmp = icmp ult <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ult <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_ule_3xi16( +; GCN-LABEL: @select_ule_3xi16( +; SI: %cmp = icmp ule <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ule <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_sgt_3xi16( +; GCN-LABEL: @select_sgt_3xi16( +; SI: %cmp = icmp sgt <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sgt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_sge_3xi16( +; GCN-LABEL: @select_sge_3xi16( +; SI: %cmp = icmp sge <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sge <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_slt_3xi16( +; GCN-LABEL: @select_slt_3xi16( +; SI: %cmp = icmp slt <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp slt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } -; VI-LABEL: @select_sle_3xi16( +; GCN-LABEL: @select_sle_3xi16( +; SI: %cmp = icmp sle <3 x i16> %a, %b +; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b +; SI-NEXT: ret <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] -; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> -; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> -; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] -; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI: ret <3 x i16> %[[SEL_16]] +; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[SEL_16]] define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sle <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b ret <3 x i16> %sel } + +declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>) +; GCN-LABEL: @bitreverse_3xi16( +; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) +; SI-NEXT: ret <3 x i16> %brev +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]]) +; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16> +; VI-NEXT: ret <3 x i16> %[[R_16]] +define <3 x i16> @bitreverse_3xi16(<3 x i16> %a) { + %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) + ret <3 x i16> %brev +}