1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

AMDGPU/GlobalISel: Fix shift legalization for non-power-of-2

clampScalar doesn't do anything for non-power-of-2 in range.
There should probably be a combination rule to reduce the number
of matching rules.

llvm-svn: 353526
This commit is contained in:
Matt Arsenault 2019-02-08 15:06:24 +00:00
parent bdac3185da
commit 75b6905f36
2 changed files with 87 additions and 0 deletions

View File

@ -452,12 +452,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
Shifts.clampScalar(1, S16, S32);
Shifts.clampScalar(0, S16, S64);
Shifts.widenScalarToNextPow2(0, 16);
} else {
// Make sure we legalize the shift amount type first, as the general
// expansion for the shifted type will produce much worse code if it hasn't
// been truncated already.
Shifts.clampScalar(1, S32, S32);
Shifts.clampScalar(0, S32, S64);
Shifts.widenScalarToNextPow2(0, 32);
}
Shifts.scalarize(0);

View File

@ -292,6 +292,7 @@ body: |
...
---
name: test_shl_v2s32_v2s32
body: |
bb.0:
@ -674,6 +675,90 @@ body: |
$vgpr0_vgpr1 = COPY %2
...
---
name: test_shl_s7_s7
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; SI-LABEL: name: test_shl_s7_s7
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32)
; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
; SI: $vgpr0 = COPY [[COPY4]](s32)
; VI-LABEL: name: test_shl_s7_s7
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
; VI: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_shl_s7_s7
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s7) = G_TRUNC %0
%3:_(s7) = G_TRUNC %1
%4:_(s7) = G_SHL %2, %3
%5:_(s32) = G_ANYEXT %4
$vgpr0 = COPY %5
...
---
name: test_shl_i24_i32
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; SI-LABEL: name: test_shl_i24_i32
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32)
; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
; SI: $vgpr0 = COPY [[COPY3]](s32)
; VI-LABEL: name: test_shl_i24_i32
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32)
; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
; VI: $vgpr0 = COPY [[COPY3]](s32)
; GFX9-LABEL: name: test_shl_i24_i32
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
; GFX9: $vgpr0 = COPY [[COPY3]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s24) = G_TRUNC %0
%3:_(s24) = G_SHL %2, %1
%4:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
...
---
name: test_shl_s128_s128
body: |