1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[RISCV] Use SLLI/SRLI instead of SLLIW/SRLIW for (srl (and X, 0xffff), C) custom isel on RV64.

We don't need the sign extending behavior here and SLLI/SRLI
are able to compress to C.SLLI/C.SRLI.
This commit is contained in:
Craig Topper 2021-04-11 11:57:52 -07:00
parent 74d4501854
commit f05f5de866
6 changed files with 113 additions and 113 deletions

View File

@ -423,7 +423,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// We don't need this transform if zext.h is supported.
if (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())
break;
// Optimize (srl (and X, 0xffff), C) -> (srli (slli X, 16), 16 + C).
// Optimize (srl (and X, 0xffff), C) ->
// (srli (slli X, (XLen-16), (XLen-16) + C)
// Taking into account that the 0xffff may have had lower bits unset by
// SimplifyDemandedBits. This avoids materializing the 0xffff immediate.
// This pattern occurs when type legalizing i16 right shifts.
@ -437,14 +438,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t Mask = N0.getConstantOperandVal(1);
Mask |= maskTrailingOnes<uint64_t>(ShAmt);
if (Mask == 0xffff) {
unsigned SLLOpc = Subtarget->is64Bit() ? RISCV::SLLIW : RISCV::SLLI;
unsigned SRLOpc = Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI;
unsigned LShAmt = Subtarget->getXLen() - 16;
SDNode *SLLI =
CurDAG->getMachineNode(SLLOpc, DL, VT, N0->getOperand(0),
CurDAG->getTargetConstant(16, DL, VT));
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
CurDAG->getTargetConstant(LShAmt, DL, VT));
SDNode *SRLI = CurDAG->getMachineNode(
SRLOpc, DL, VT, SDValue(SLLI, 0),
CurDAG->getTargetConstant(16 + ShAmt, DL, VT));
RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
ReplaceNode(Node, SRLI);
return;
}

View File

@ -127,8 +127,8 @@ define i16 @srli(i16 %a) nounwind {
;
; RV64I-LABEL: srli:
; RV64I: # %bb.0:
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: srliw a0, a0, 22
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 54
; RV64I-NEXT: ret
%1 = lshr i16 %a, 6
ret i16 %1

View File

@ -26,8 +26,8 @@ define i16 @test_bswap_i16(i16 %a) nounwind {
; RV64I-LABEL: test_bswap_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 8
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 56
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
%tmp = call i16 @llvm.bswap.i16(i16 %a)

View File

@ -425,14 +425,14 @@ define i16 @udiv16_pow2(i16 %a) nounwind {
;
; RV64I-LABEL: udiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: srliw a0, a0, 19
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 51
; RV64I-NEXT: ret
;
; RV64IM-LABEL: udiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slliw a0, a0, 16
; RV64IM-NEXT: srliw a0, a0, 19
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: srli a0, a0, 51
; RV64IM-NEXT: ret
%1 = udiv i16 %a, 8
ret i16 %1

View File

@ -669,8 +669,8 @@ define i8 @srai_i8(i8 %a) nounwind {
define i16 @srli_i16(i16 %a) nounwind {
; RV64I-LABEL: srli_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: srliw a0, a0, 22
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 54
; RV64I-NEXT: ret
;
; RV64IB-LABEL: srli_i16:

View File

@ -81,58 +81,58 @@ define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
; LMULMAX2-RV64-NEXT: vle16.v v25, (a0)
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 0(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e16,m1,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 7
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 14(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 6
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 12(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 5
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 10(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 8(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 3
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 6(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 4(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 2(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
@ -217,58 +217,58 @@ define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 0(sp)
; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e16,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 7
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 14(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 6
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 12(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 5
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 10(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 8(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 3
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 6(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 4(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: slli a2, a1, 8
; LMULMAX1-RV64-NEXT: slliw a1, a1, 16
; LMULMAX1-RV64-NEXT: srliw a1, a1, 24
; LMULMAX1-RV64-NEXT: slli a1, a1, 48
; LMULMAX1-RV64-NEXT: srli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: sh a1, 2(sp)
; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
@ -960,114 +960,114 @@ define void @bswap_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
; LMULMAX2-RV64-NEXT: vle16.v v26, (a0)
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 0(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 15
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 30(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 14
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 28(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 13
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 26(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 12
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 24(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 11
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 22(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 10
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 20(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 9
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 18(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 8
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 16(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 7
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 14(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 6
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 12(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 5
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 10(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 4
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 8(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 6(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 4(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slliw a1, a1, 16
; LMULMAX2-RV64-NEXT: srliw a1, a1, 24
; LMULMAX2-RV64-NEXT: slli a1, a1, 48
; LMULMAX2-RV64-NEXT: srli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sh a1, 2(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu
@ -1217,113 +1217,113 @@ define void @bswap_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 16(sp)
; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 7
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 30(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 6
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 28(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 5
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 26(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 24(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 3
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 22(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 20(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 18(sp)
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 0(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 7
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 14(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 6
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 12(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 5
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 10(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 8(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 3
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 6(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 4(sp)
; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: slliw a2, a2, 16
; LMULMAX1-RV64-NEXT: srliw a2, a2, 24
; LMULMAX1-RV64-NEXT: slli a2, a2, 48
; LMULMAX1-RV64-NEXT: srli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: sh a2, 2(sp)
; LMULMAX1-RV64-NEXT: vsetivli a2, 8, e16,m1,ta,mu