1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
Sanjay Patel 8720d89aac [DAGCombiner] re-enable truncation of binops
This is effectively re-committing the changes from:
rL347917 (D54640)
rL348195 (D55126)
...which were effectively reverted here:
rL348604
...because the code had a bug that could induce infinite looping
or eventual out-of-memory compilation.

The bug was that this code did not guard against transforming
opaque constants. More details are in the post-commit mailing
list thread for r347917. A reduced test for that is included
in the x86 bool-math.ll file. (I wasn't able to reduce a PPC
backend test for this, but it was almost the same pattern.)

Original commit message for r347917:

The motivating case for this is shown in:
https://bugs.llvm.org/show_bug.cgi?id=32023
and the corresponding rot16.ll regression tests.

Because x86 scalar shift amounts are i8 values, we can end up with trunc-binop-trunc
sequences that don't get folded in IR.

As the TODO comments suggest, there will be regressions if we extend this (for x86,
we mostly seem to be missing LEA opportunities, but there are likely vector folds
missing too). I think those should be considered existing bugs because this is the
same transform that we do as an IR canonicalization in instcombine. We just need
more tests to make those visible independent of this patch.

llvm-svn: 348706
2018-12-08 16:07:38 +00:00

307 lines
6.5 KiB
LLVM

; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; This particular case will actually be worse in terms of code size
; from sinking into both.
; OPT-LABEL: @sink_ubfe_i32(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = lshr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = lshr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i32:
; GCN-NOT: lshr
; GCN: s_cbranch_scc1
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
; GCN: BB0_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
; GCN: BB0_3:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = lshr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i32 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i32 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i32 %phi, i32 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_sbfe_i32(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = ashr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = ashr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_sbfe_i32:
define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = ashr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i32 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i32 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i32 %phi, i32 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i16(
; OPT: entry:
; OPT-NEXT: br i1
; OPT: bb0:
; OPT: %0 = lshr i16 %arg1, 4
; OPT-NEXT: %val0 = and i16 %0, 255
; OPT: br label
; OPT: bb1:
; OPT: %1 = lshr i16 %arg1, 4
; OPT-NEXT: %val1 = and i16 %1, 127
; OPT: br label
; OPT: ret:
; OPT: store
; OPT: ret
; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc1
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
; GCN: BB2_3:
; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
entry:
%shr = lshr i16 %arg1, 4
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i16 %shr, 255
store volatile i16 0, i16 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i16 %shr, 127
store volatile i16 0, i16 addrspace(1)* undef
br label %ret
ret:
%phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i16 %phi, i16 addrspace(1)* %out
ret void
}
; We don't really want to sink this one since it isn't reducible to a
; 32-bit BFE on one half of the integer.
; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 30
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 30
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: s_cbranch_scc1 BB3_2
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
; GCN: BB3_2:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
; GCN: BB3_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 30
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i64_low32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 15
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 15
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
; GCN: s_cbranch_scc1 BB4_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
; GCN: BB4_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
; GCN: BB4_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 15
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
; OPT-LABEL: @sink_ubfe_i64_high32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1
; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 35
; OPT-NEXT: %val0 = and i64 %0, 255
; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 35
; OPT-NEXT: %val1 = and i64 %1, 127
; OPT: ret:
; OPT: store
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc1 BB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
; GCN: BB5_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
; GCN: BB5_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 35
br i1 undef, label %bb0, label %bb1
bb0:
%val0 = and i64 %shr, 255
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
bb1:
%val1 = and i64 %shr, 127
store volatile i32 0, i32 addrspace(1)* undef
br label %ret
ret:
%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
store i64 %phi, i64 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }