mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU/GlobalISel: Enable s_{and|or}n2_{b32|b64} patterns
This commit is contained in:
parent
7abe7a3b10
commit
c97959510d
@ -408,8 +408,14 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
|
|||||||
class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
|
class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
|
||||||
(ops node:$src0),
|
(ops node:$src0),
|
||||||
(Op $src0),
|
(Op $src0),
|
||||||
[{ return !N->isDivergent(); }]
|
[{ return !N->isDivergent(); }]> {
|
||||||
>;
|
// This check is unnecessary as it's captured by the result register
|
||||||
|
// bank constraint.
|
||||||
|
//
|
||||||
|
// FIXME: Should add a way for the emitter to recognize this is a
|
||||||
|
// trivially true predicate to eliminate the check.
|
||||||
|
let GISelPredicateCode = [{return true;}];
|
||||||
|
}
|
||||||
|
|
||||||
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
|
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
|
||||||
(ops node:$src0, node:$src1),
|
(ops node:$src0, node:$src1),
|
||||||
|
@ -5,8 +5,7 @@
|
|||||||
define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
||||||
; GCN-LABEL: s_andn2_i32:
|
; GCN-LABEL: s_andn2_i32:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s0, s3
|
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: s_and_b32 s0, s2, s0
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%and = and i32 %src0, %not.src1
|
%and = and i32 %src0, %not.src1
|
||||||
@ -16,8 +15,7 @@ define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
|||||||
define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
|
define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
|
||||||
; GCN-LABEL: s_andn2_i32_commute:
|
; GCN-LABEL: s_andn2_i32_commute:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s0, s3
|
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%and = and i32 %not.src1, %src0
|
%and = and i32 %not.src1, %src0
|
||||||
@ -28,7 +26,7 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
|
|||||||
; GCN-LABEL: s_andn2_i32_multi_use:
|
; GCN-LABEL: s_andn2_i32_multi_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s1, s3
|
; GCN-NEXT: s_not_b32 s1, s3
|
||||||
; GCN-NEXT: s_and_b32 s0, s2, s1
|
; GCN-NEXT: s_andn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%and = and i32 %src0, %not.src1
|
%and = and i32 %src0, %not.src1
|
||||||
@ -40,9 +38,8 @@ define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg
|
|||||||
define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
|
define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
|
||||||
; GCN-LABEL: s_andn2_i32_multi_foldable_use:
|
; GCN-LABEL: s_andn2_i32_multi_foldable_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s1, s4
|
; GCN-NEXT: s_andn2_b32 s0, s2, s4
|
||||||
; GCN-NEXT: s_and_b32 s0, s2, s1
|
; GCN-NEXT: s_andn2_b32 s1, s3, s4
|
||||||
; GCN-NEXT: s_and_b32 s1, s3, s1
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src2 = xor i32 %src2, -1
|
%not.src2 = xor i32 %src2, -1
|
||||||
%and0 = and i32 %src0, %not.src2
|
%and0 = and i32 %src0, %not.src2
|
||||||
@ -91,8 +88,7 @@ define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) {
|
|||||||
define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_andn2_i64:
|
; GCN-LABEL: s_andn2_i64:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
|
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%and = and i64 %src0, %not.src1
|
%and = and i64 %src0, %not.src1
|
||||||
@ -102,8 +98,7 @@ define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
|||||||
define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_andn2_i64_commute:
|
; GCN-LABEL: s_andn2_i64_commute:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
|
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%and = and i64 %not.src1, %src0
|
%and = and i64 %not.src1, %src0
|
||||||
@ -113,9 +108,8 @@ define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
|||||||
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
|
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
|
||||||
; GCN-LABEL: s_andn2_i64_multi_foldable_use:
|
; GCN-LABEL: s_andn2_i64_multi_foldable_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[6:7], s[6:7]
|
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7]
|
||||||
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
|
; GCN-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7]
|
||||||
; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src2 = xor i64 %src2, -1
|
%not.src2 = xor i64 %src2, -1
|
||||||
%and0 = and i64 %src0, %not.src2
|
%and0 = and i64 %src0, %not.src2
|
||||||
@ -128,10 +122,10 @@ define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i
|
|||||||
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_andn2_i64_multi_use:
|
; GCN-LABEL: s_andn2_i64_multi_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[4:5], s[4:5]
|
; GCN-NEXT: s_not_b64 s[6:7], s[4:5]
|
||||||
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
|
; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_mov_b32 s2, s4
|
; GCN-NEXT: s_mov_b32 s2, s6
|
||||||
; GCN-NEXT: s_mov_b32 s3, s5
|
; GCN-NEXT: s_mov_b32 s3, s7
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%and = and i64 %src0, %not.src1
|
%and = and i64 %src0, %not.src1
|
||||||
|
@ -5,8 +5,7 @@
|
|||||||
define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
||||||
; GCN-LABEL: s_orn2_i32:
|
; GCN-LABEL: s_orn2_i32:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s0, s3
|
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: s_or_b32 s0, s2, s0
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%or = or i32 %src0, %not.src1
|
%or = or i32 %src0, %not.src1
|
||||||
@ -16,8 +15,7 @@ define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
|
|||||||
define amdgpu_ps i32 @s_orn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
|
define amdgpu_ps i32 @s_orn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
|
||||||
; GCN-LABEL: s_orn2_i32_commute:
|
; GCN-LABEL: s_orn2_i32_commute:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s0, s3
|
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: s_or_b32 s0, s0, s2
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%or = or i32 %not.src1, %src0
|
%or = or i32 %not.src1, %src0
|
||||||
@ -28,7 +26,7 @@ define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_use(i32 inreg %src0, i32 inreg %
|
|||||||
; GCN-LABEL: s_orn2_i32_multi_use:
|
; GCN-LABEL: s_orn2_i32_multi_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s1, s3
|
; GCN-NEXT: s_not_b32 s1, s3
|
||||||
; GCN-NEXT: s_or_b32 s0, s2, s1
|
; GCN-NEXT: s_orn2_b32 s0, s2, s3
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i32 %src1, -1
|
%not.src1 = xor i32 %src1, -1
|
||||||
%or = or i32 %src0, %not.src1
|
%or = or i32 %src0, %not.src1
|
||||||
@ -40,9 +38,8 @@ define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_use(i32 inreg %src0, i32 inreg %
|
|||||||
define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
|
define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
|
||||||
; GCN-LABEL: s_orn2_i32_multi_foldable_use:
|
; GCN-LABEL: s_orn2_i32_multi_foldable_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b32 s1, s4
|
; GCN-NEXT: s_orn2_b32 s0, s2, s4
|
||||||
; GCN-NEXT: s_or_b32 s0, s2, s1
|
; GCN-NEXT: s_orn2_b32 s1, s3, s4
|
||||||
; GCN-NEXT: s_or_b32 s1, s3, s1
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src2 = xor i32 %src2, -1
|
%not.src2 = xor i32 %src2, -1
|
||||||
%or0 = or i32 %src0, %not.src2
|
%or0 = or i32 %src0, %not.src2
|
||||||
@ -91,8 +88,7 @@ define amdgpu_ps float @v_orn2_i32_vs(i32 %src0, i32 inreg %src1) {
|
|||||||
define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_orn2_i64:
|
; GCN-LABEL: s_orn2_i64:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
|
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%or = or i64 %src0, %not.src1
|
%or = or i64 %src0, %not.src1
|
||||||
@ -102,8 +98,7 @@ define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) {
|
|||||||
define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_orn2_i64_commute:
|
; GCN-LABEL: s_orn2_i64_commute:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[0:1], s[4:5]
|
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%or = or i64 %not.src1, %src0
|
%or = or i64 %not.src1, %src0
|
||||||
@ -113,9 +108,8 @@ define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
|
|||||||
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
|
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
|
||||||
; GCN-LABEL: s_orn2_i64_multi_foldable_use:
|
; GCN-LABEL: s_orn2_i64_multi_foldable_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[6:7], s[6:7]
|
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[6:7]
|
||||||
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7]
|
; GCN-NEXT: s_orn2_b64 s[2:3], s[4:5], s[6:7]
|
||||||
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[6:7]
|
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src2 = xor i64 %src2, -1
|
%not.src2 = xor i64 %src2, -1
|
||||||
%or0 = or i64 %src0, %not.src2
|
%or0 = or i64 %src0, %not.src2
|
||||||
@ -128,10 +122,10 @@ define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i6
|
|||||||
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
|
define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
|
||||||
; GCN-LABEL: s_orn2_i64_multi_use:
|
; GCN-LABEL: s_orn2_i64_multi_use:
|
||||||
; GCN: ; %bb.0:
|
; GCN: ; %bb.0:
|
||||||
; GCN-NEXT: s_not_b64 s[4:5], s[4:5]
|
; GCN-NEXT: s_not_b64 s[6:7], s[4:5]
|
||||||
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
|
; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5]
|
||||||
; GCN-NEXT: s_mov_b32 s2, s4
|
; GCN-NEXT: s_mov_b32 s2, s6
|
||||||
; GCN-NEXT: s_mov_b32 s3, s5
|
; GCN-NEXT: s_mov_b32 s3, s7
|
||||||
; GCN-NEXT: ; return to shader part epilog
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
%not.src1 = xor i64 %src1, -1
|
%not.src1 = xor i64 %src1, -1
|
||||||
%or = or i64 %src0, %not.src1
|
%or = or i64 %src0, %not.src1
|
||||||
|
Loading…
Reference in New Issue
Block a user