1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[ARM] Handle any extend whilst lowering addw/addl/subw/subl

Same as a9b6440edd, use zanyext to treat any_extends as zero extends
during lowering to create addw/addl/subw/subl nodes.

Differential Revision: https://reviews.llvm.org/D93835
This commit is contained in:
David Green 2021-01-06 11:26:39 +00:00
parent ff9300b791
commit 6e066a74ee
3 changed files with 32 additions and 50 deletions

View File

@ -4197,10 +4197,10 @@ def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "u", add, zext, 1>;
"vaddl", "u", add, zanyext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@ -5045,10 +5045,10 @@ def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "u", sub, zext, 0>;
"vsubl", "u", sub, zanyext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,

View File

@ -224,9 +224,7 @@ define <2 x i64> @vaddlu32(<2 x i32> %A, <2 x i32> %B) {
define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) {
; CHECK-LABEL: vaddla8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u8 q8, d1
; CHECK-NEXT: vmovl.u8 q9, d0
; CHECK-NEXT: vadd.i16 q0, q9, q8
; CHECK-NEXT: vaddl.u8 q0, d0, d1
; CHECK-NEXT: vbic.i16 q0, #0xff00
; CHECK-NEXT: bx lr
%tmp3 = zext <8 x i8> %A to <8 x i16>
@ -239,11 +237,9 @@ define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) {
define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) {
; CHECK-LABEL: vaddla16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u16 q8, d1
; CHECK-NEXT: vmovl.u16 q9, d0
; CHECK-NEXT: vmov.i32 q10, #0xffff
; CHECK-NEXT: vadd.i32 q8, q9, q8
; CHECK-NEXT: vand q0, q8, q10
; CHECK-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEXT: vaddl.u16 q9, d0, d1
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <4 x i16> %A to <4 x i32>
%tmp4 = zext <4 x i16> %B to <4 x i32>
@ -255,11 +251,9 @@ define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) {
define <2 x i64> @vaddla32(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: vaddla32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u32 q8, d1
; CHECK-NEXT: vmovl.u32 q9, d0
; CHECK-NEXT: vmov.i64 q10, #0xffffffff
; CHECK-NEXT: vadd.i64 q8, q9, q8
; CHECK-NEXT: vand q0, q8, q10
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vaddl.u32 q9, d0, d1
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <2 x i32> %A to <2 x i64>
%tmp4 = zext <2 x i32> %B to <2 x i64>
@ -331,8 +325,7 @@ define <2 x i64> @vaddwu32(<2 x i64> %A, <2 x i32> %B) {
define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) {
; CHECK-LABEL: vaddwa8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u8 q8, d2
; CHECK-NEXT: vadd.i16 q0, q0, q8
; CHECK-NEXT: vaddw.u8 q0, q0, d2
; CHECK-NEXT: vbic.i16 q0, #0xff00
; CHECK-NEXT: bx lr
%tmp3 = zext <8 x i8> %B to <8 x i16>
@ -344,10 +337,9 @@ define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) {
define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) {
; CHECK-LABEL: vaddwa16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u16 q8, d2
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vadd.i32 q8, q0, q8
; CHECK-NEXT: vand q0, q8, q9
; CHECK-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEXT: vaddw.u16 q9, q0, d2
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <4 x i16> %B to <4 x i32>
%tmp4 = add <4 x i32> %A, %tmp3
@ -358,10 +350,9 @@ define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) {
define <2 x i64> @vaddwa32(<2 x i64> %A, <2 x i32> %B) {
; CHECK-LABEL: vaddwa32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u32 q8, d2
; CHECK-NEXT: vmov.i64 q9, #0xffffffff
; CHECK-NEXT: vadd.i64 q8, q0, q8
; CHECK-NEXT: vand q0, q8, q9
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vaddw.u32 q9, q0, d2
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <2 x i32> %B to <2 x i64>
%tmp4 = add <2 x i64> %A, %tmp3

View File

@ -224,9 +224,7 @@ define <2 x i64> @vsublu32(<2 x i32> %A, <2 x i32> %B) {
define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
; CHECK-LABEL: vsubla8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u8 q8, d1
; CHECK-NEXT: vmovl.u8 q9, d0
; CHECK-NEXT: vsub.i16 q0, q9, q8
; CHECK-NEXT: vsubl.u8 q0, d0, d1
; CHECK-NEXT: vbic.i16 q0, #0xff00
; CHECK-NEXT: bx lr
%tmp3 = zext <8 x i8> %A to <8 x i16>
@ -239,11 +237,9 @@ define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
; CHECK-LABEL: vsubla16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u16 q8, d1
; CHECK-NEXT: vmovl.u16 q9, d0
; CHECK-NEXT: vmov.i32 q10, #0xffff
; CHECK-NEXT: vsub.i32 q8, q9, q8
; CHECK-NEXT: vand q0, q8, q10
; CHECK-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEXT: vsubl.u16 q9, d0, d1
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <4 x i16> %A to <4 x i32>
%tmp4 = zext <4 x i16> %B to <4 x i32>
@ -255,11 +251,9 @@ define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: vsubla32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u32 q8, d1
; CHECK-NEXT: vmovl.u32 q9, d0
; CHECK-NEXT: vmov.i64 q10, #0xffffffff
; CHECK-NEXT: vsub.i64 q8, q9, q8
; CHECK-NEXT: vand q0, q8, q10
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vsubl.u32 q9, d0, d1
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <2 x i32> %A to <2 x i64>
%tmp4 = zext <2 x i32> %B to <2 x i64>
@ -331,8 +325,7 @@ define <2 x i64> @vsubwu32(<2 x i64> %A, <2 x i32> %B) {
define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
; CHECK-LABEL: vsubwa8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u8 q8, d2
; CHECK-NEXT: vsub.i16 q0, q0, q8
; CHECK-NEXT: vsubw.u8 q0, q0, d2
; CHECK-NEXT: vbic.i16 q0, #0xff00
; CHECK-NEXT: bx lr
%tmp3 = zext <8 x i8> %B to <8 x i16>
@ -344,10 +337,9 @@ define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
; CHECK-LABEL: vsubwa16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u16 q8, d2
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vsub.i32 q8, q0, q8
; CHECK-NEXT: vand q0, q8, q9
; CHECK-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEXT: vsubw.u16 q9, q0, d2
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <4 x i16> %B to <4 x i32>
%tmp4 = sub <4 x i32> %A, %tmp3
@ -358,10 +350,9 @@ define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) {
; CHECK-LABEL: vsubwa32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u32 q8, d2
; CHECK-NEXT: vmov.i64 q9, #0xffffffff
; CHECK-NEXT: vsub.i64 q8, q0, q8
; CHECK-NEXT: vand q0, q8, q9
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vsubw.u32 q9, q0, d2
; CHECK-NEXT: vand q0, q9, q8
; CHECK-NEXT: bx lr
%tmp3 = zext <2 x i32> %B to <2 x i64>
%tmp4 = sub <2 x i64> %A, %tmp3