mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Don't create weird sized integers
There's no reason to introduce a new, unnaturally sized value here. This has a chance to produce worse code with legalization. Avoids regression in a future patch.
This commit is contained in:
parent
ddf5bbdb8a
commit
87ac35b140
@ -725,10 +725,10 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
|
||||
Res = Builder.CreateSub(Num, Rem);
|
||||
}
|
||||
|
||||
// Truncate to number of bits this divide really is.
|
||||
// Extend in register from the number of bits this divide really is.
|
||||
if (IsSigned) {
|
||||
Res = Builder.CreateTrunc(Res, Builder.getIntNTy(DivBits));
|
||||
Res = Builder.CreateSExt(Res, Ty);
|
||||
Res = Builder.CreateShl(Res, 32 - DivBits);
|
||||
Res = Builder.CreateAShr(Res, 32 - DivBits);
|
||||
} else {
|
||||
ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
|
||||
Res = Builder.CreateAnd(Res, TruncMask);
|
||||
|
@ -294,8 +294,8 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) {
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 16
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 16
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
|
||||
; CHECK-NEXT: store i16 [[TMP21]], i16 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -327,8 +327,8 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) {
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
|
||||
; CHECK-NEXT: store i16 [[TMP23]], i16 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -414,8 +414,8 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) {
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sext i8 [[TMP19]] to i32
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 24
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 24
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8
|
||||
; CHECK-NEXT: store i8 [[TMP21]], i8 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -447,8 +447,8 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) {
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i8 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 24
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 24
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i8
|
||||
; CHECK-NEXT: store i8 [[TMP23]], i8 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -1464,8 +1464,8 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> undef, i16 [[TMP23]], i64 0
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[X]], i64 1
|
||||
@ -1488,8 +1488,8 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]]
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 16
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 16
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP47]], i64 1
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i16> [[X]], i64 2
|
||||
@ -1512,8 +1512,8 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 16
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 16
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i16> [[TMP48]], i16 [[TMP71]], i64 2
|
||||
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <4 x i16> [[X]], i64 3
|
||||
@ -1536,8 +1536,8 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]]
|
||||
; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[TMP79]], i32 0
|
||||
; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP87]], [[TMP91]]
|
||||
; CHECK-NEXT: [[TMP93:%.*]] = trunc i32 [[TMP92]] to i16
|
||||
; CHECK-NEXT: [[TMP94:%.*]] = sext i16 [[TMP93]] to i32
|
||||
; CHECK-NEXT: [[TMP93:%.*]] = shl i32 [[TMP92]], 16
|
||||
; CHECK-NEXT: [[TMP94:%.*]] = ashr i32 [[TMP93]], 16
|
||||
; CHECK-NEXT: [[TMP95:%.*]] = trunc i32 [[TMP94]] to i16
|
||||
; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i16> [[TMP72]], i16 [[TMP95]], i64 3
|
||||
; CHECK-NEXT: store <4 x i16> [[TMP96]], <4 x i16> addrspace(1)* [[OUT:%.*]]
|
||||
@ -1572,8 +1572,8 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 16
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> undef, i16 [[TMP25]], i64 0
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[X]], i64 1
|
||||
@ -1598,8 +1598,8 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]]
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 16
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 16
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP51]], i64 1
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i16> [[X]], i64 2
|
||||
@ -1624,8 +1624,8 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]]
|
||||
; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]]
|
||||
; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]]
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 16
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 16
|
||||
; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16
|
||||
; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i16> [[TMP52]], i16 [[TMP77]], i64 2
|
||||
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i16> [[X]], i64 3
|
||||
@ -1650,8 +1650,8 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
|
||||
; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP93]], [[TMP97]]
|
||||
; CHECK-NEXT: [[TMP99:%.*]] = mul i32 [[TMP98]], [[TMP82]]
|
||||
; CHECK-NEXT: [[TMP100:%.*]] = sub i32 [[TMP81]], [[TMP99]]
|
||||
; CHECK-NEXT: [[TMP101:%.*]] = trunc i32 [[TMP100]] to i16
|
||||
; CHECK-NEXT: [[TMP102:%.*]] = sext i16 [[TMP101]] to i32
|
||||
; CHECK-NEXT: [[TMP101:%.*]] = shl i32 [[TMP100]], 16
|
||||
; CHECK-NEXT: [[TMP102:%.*]] = ashr i32 [[TMP101]], 16
|
||||
; CHECK-NEXT: [[TMP103:%.*]] = trunc i32 [[TMP102]] to i16
|
||||
; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i16> [[TMP78]], i16 [[TMP103]], i64 3
|
||||
; CHECK-NEXT: store <4 x i16> [[TMP104]], <4 x i16> addrspace(1)* [[OUT:%.*]]
|
||||
@ -1738,8 +1738,8 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) {
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sext i3 [[TMP19]] to i32
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 29
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 29
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3
|
||||
; CHECK-NEXT: store i3 [[TMP21]], i3 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -1771,8 +1771,8 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) {
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i3 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 29
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 29
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i3
|
||||
; CHECK-NEXT: store i3 [[TMP23]], i3 addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
@ -1950,8 +1950,8 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i16> undef, i16 [[TMP23]], i64 0
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i16> [[X]], i64 1
|
||||
@ -1974,8 +1974,8 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]]
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 16
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 16
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i16> [[TMP24]], i16 [[TMP47]], i64 1
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i16> [[X]], i64 2
|
||||
@ -1998,8 +1998,8 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 16
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 16
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i16> [[TMP48]], i16 [[TMP71]], i64 2
|
||||
; CHECK-NEXT: store <3 x i16> [[TMP72]], <3 x i16> addrspace(1)* [[OUT:%.*]]
|
||||
@ -2034,8 +2034,8 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 16
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i16> undef, i16 [[TMP25]], i64 0
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i16> [[X]], i64 1
|
||||
@ -2060,8 +2060,8 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]]
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 16
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 16
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i16> [[TMP26]], i16 [[TMP51]], i64 1
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i16> [[X]], i64 2
|
||||
@ -2086,8 +2086,8 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]]
|
||||
; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]]
|
||||
; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]]
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 16
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 16
|
||||
; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16
|
||||
; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i16> [[TMP52]], i16 [[TMP77]], i64 2
|
||||
; CHECK-NEXT: store <3 x i16> [[TMP78]], <3 x i16> addrspace(1)* [[OUT:%.*]]
|
||||
@ -2266,8 +2266,8 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sext i15 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 17
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 17
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i15> undef, i15 [[TMP23]], i64 0
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i15> [[X]], i64 1
|
||||
@ -2290,8 +2290,8 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]]
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i15
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = sext i15 [[TMP45]] to i32
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 17
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 17
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i15
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i15> [[TMP24]], i15 [[TMP47]], i64 1
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i15> [[X]], i64 2
|
||||
@ -2314,8 +2314,8 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i15
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = sext i15 [[TMP69]] to i32
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 17
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 17
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i15
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i15> [[TMP48]], i15 [[TMP71]], i64 2
|
||||
; CHECK-NEXT: store <3 x i15> [[TMP72]], <3 x i15> addrspace(1)* [[OUT:%.*]]
|
||||
@ -2350,8 +2350,8 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = sext i15 [[TMP23]] to i32
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 17
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 17
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i15
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i15> undef, i15 [[TMP25]], i64 0
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i15> [[X]], i64 1
|
||||
@ -2376,8 +2376,8 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]]
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i15
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = sext i15 [[TMP49]] to i32
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 17
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 17
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i15
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i15> [[TMP26]], i15 [[TMP51]], i64 1
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i15> [[X]], i64 2
|
||||
@ -2402,8 +2402,8 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]]
|
||||
; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]]
|
||||
; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]]
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i15
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = sext i15 [[TMP75]] to i32
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 17
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 17
|
||||
; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i15
|
||||
; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i15> [[TMP52]], i15 [[TMP77]], i64 2
|
||||
; CHECK-NEXT: store <3 x i15> [[TMP78]], <3 x i15> addrspace(1)* [[OUT:%.*]]
|
||||
|
Loading…
Reference in New Issue
Block a user