From bb4a57c6712aa26500c7c1021eb1a1141c1eb237 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 8 Jun 2021 11:05:09 +0100 Subject: [PATCH] [RISCV] Add a test case showing inefficient vector codegen --- .../rvv/fixed-vectors-bitcast-large-vector.ll | 1206 +++++++++++++++++ 1 file changed, 1206 insertions(+) create mode 100644 test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll diff --git a/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll new file mode 100644 index 00000000000..58cd8d1c6af --- /dev/null +++ b/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll @@ -0,0 +1,1206 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 + +; FIXME: A larger VLEN is producing worse code. +; FIXME: v256i16 is legal so v512i8 should be too since they're the same size. +define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) { +; VLEN256-LABEL: bitcast_1024B: +; VLEN256: # %bb.0: +; VLEN256-NEXT: addi a1, a0, 256 +; VLEN256-NEXT: addi a2, zero, 256 +; VLEN256-NEXT: vsetvli zero, a2, e8,m8,ta,mu +; VLEN256-NEXT: vle8.v v24, (a0) +; VLEN256-NEXT: vle8.v v0, (a1) +; VLEN256-NEXT: vadd.vv v8, v24, v8 +; VLEN256-NEXT: vadd.vv v16, v0, v16 +; VLEN256-NEXT: ret +; +; VLEN512-LABEL: bitcast_1024B: +; VLEN512: # %bb.0: +; VLEN512-NEXT: addi sp, sp, -1024 +; VLEN512-NEXT: .cfi_def_cfa_offset 1024 +; VLEN512-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s1, 1000(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s2, 992(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s3, 984(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s4, 976(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s5, 968(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s6, 960(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s7, 952(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s8, 944(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s9, 936(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s10, 928(sp) # 8-byte Folded Spill +; VLEN512-NEXT: sd s11, 920(sp) # 8-byte Folded Spill +; VLEN512-NEXT: .cfi_offset ra, -8 +; VLEN512-NEXT: .cfi_offset s0, -16 +; VLEN512-NEXT: .cfi_offset s1, -24 +; VLEN512-NEXT: .cfi_offset s2, -32 +; VLEN512-NEXT: .cfi_offset s3, -40 +; VLEN512-NEXT: .cfi_offset s4, -48 +; VLEN512-NEXT: .cfi_offset s5, -56 +; VLEN512-NEXT: .cfi_offset s6, -64 +; VLEN512-NEXT: .cfi_offset s7, -72 +; VLEN512-NEXT: .cfi_offset s8, -80 +; VLEN512-NEXT: .cfi_offset s9, -88 +; VLEN512-NEXT: .cfi_offset s10, -96 +; VLEN512-NEXT: .cfi_offset s11, -104 +; VLEN512-NEXT: addi s0, sp, 1024 +; VLEN512-NEXT: .cfi_def_cfa s0, 0 +; VLEN512-NEXT: csrr a0, vlenb +; VLEN512-NEXT: slli a0, a0, 3 +; VLEN512-NEXT: sub sp, sp, a0 +; VLEN512-NEXT: andi sp, sp, -256 +; VLEN512-NEXT: addi a0, zero, 32 +; VLEN512-NEXT: vsetivli zero, 1, e64,m8,ta,mu +; VLEN512-NEXT: vslidedown.vx v24, v8, a0 +; VLEN512-NEXT: vmv.x.s a6, v24 +; VLEN512-NEXT: addi a0, zero, 33 +; VLEN512-NEXT: vslidedown.vx v24, v8, a0 +; VLEN512-NEXT: addi a0, sp, 920 +; VLEN512-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; VLEN512-NEXT: addi a0, zero, 34 +; VLEN512-NEXT: addi a1, zero, 35 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s a7, v0 +; VLEN512-NEXT: addi a1, zero, 36 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t0, v0 +; VLEN512-NEXT: addi a1, zero, 37 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t1, v0 +; VLEN512-NEXT: addi a1, zero, 38 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t2, v0 +; VLEN512-NEXT: addi a1, zero, 39 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t3, v0 +; VLEN512-NEXT: addi a1, zero, 40 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t4, v0 +; VLEN512-NEXT: addi a1, zero, 41 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t5, v0 +; VLEN512-NEXT: addi a1, zero, 42 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s t6, v0 +; VLEN512-NEXT: addi a1, zero, 43 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s2, v0 +; VLEN512-NEXT: addi a1, zero, 44 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s3, v0 +; VLEN512-NEXT: addi a1, zero, 45 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s4, v0 +; VLEN512-NEXT: addi a1, zero, 46 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s5, v0 +; VLEN512-NEXT: addi a1, zero, 47 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s6, v0 +; VLEN512-NEXT: addi a1, zero, 48 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s7, v0 +; VLEN512-NEXT: addi a1, zero, 49 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s8, v0 +; VLEN512-NEXT: addi a1, zero, 50 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s9, v0 +; VLEN512-NEXT: addi a1, zero, 51 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s10, v0 +; VLEN512-NEXT: addi a1, zero, 52 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s s11, v0 +; VLEN512-NEXT: addi a1, zero, 53 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s ra, v0 +; VLEN512-NEXT: addi a1, zero, 54 +; VLEN512-NEXT: vslidedown.vx v0, v8, a1 +; VLEN512-NEXT: vmv.x.s a1, v0 +; VLEN512-NEXT: addi a2, zero, 55 +; VLEN512-NEXT: vslidedown.vx v0, v8, a2 +; VLEN512-NEXT: vmv.x.s a2, v0 +; VLEN512-NEXT: addi a3, zero, 56 +; VLEN512-NEXT: vslidedown.vx v0, v8, a3 +; VLEN512-NEXT: vmv.x.s s1, v0 +; VLEN512-NEXT: addi a3, zero, 57 +; VLEN512-NEXT: vslidedown.vx v0, v8, a3 +; VLEN512-NEXT: vmv.x.s a3, v0 +; VLEN512-NEXT: addi a4, zero, 58 +; VLEN512-NEXT: vslidedown.vx v0, v8, a4 +; VLEN512-NEXT: vmv.x.s a4, v0 +; VLEN512-NEXT: addi a5, zero, 63 +; VLEN512-NEXT: vslidedown.vx v0, v8, a5 +; VLEN512-NEXT: vmv.x.s a5, v0 +; VLEN512-NEXT: vslidedown.vx v0, v8, a0 +; VLEN512-NEXT: srli a0, a5, 56 +; VLEN512-NEXT: sb a0, 511(sp) +; VLEN512-NEXT: srli a0, a5, 48 +; VLEN512-NEXT: sb a0, 510(sp) +; VLEN512-NEXT: srli a0, a5, 40 +; VLEN512-NEXT: sb a0, 509(sp) +; VLEN512-NEXT: srli a0, a5, 32 +; VLEN512-NEXT: sb a0, 508(sp) +; VLEN512-NEXT: srli a0, a5, 24 +; VLEN512-NEXT: sb a0, 507(sp) +; VLEN512-NEXT: srli a0, a5, 16 +; VLEN512-NEXT: sb a0, 506(sp) +; VLEN512-NEXT: addi a0, zero, 62 +; VLEN512-NEXT: vslidedown.vx v24, v8, a0 +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: sb a5, 504(sp) +; VLEN512-NEXT: srli a5, a5, 8 +; VLEN512-NEXT: sb a5, 505(sp) +; VLEN512-NEXT: srli a5, a0, 56 +; VLEN512-NEXT: sb a5, 503(sp) +; VLEN512-NEXT: srli a5, a0, 48 +; VLEN512-NEXT: sb a5, 502(sp) +; VLEN512-NEXT: srli a5, a0, 40 +; VLEN512-NEXT: sb a5, 501(sp) +; VLEN512-NEXT: srli a5, a0, 32 +; VLEN512-NEXT: sb a5, 500(sp) +; VLEN512-NEXT: srli a5, a0, 24 +; VLEN512-NEXT: sb a5, 499(sp) +; VLEN512-NEXT: srli a5, a0, 16 +; VLEN512-NEXT: sb a5, 498(sp) +; VLEN512-NEXT: addi a5, zero, 61 +; VLEN512-NEXT: vslidedown.vx v24, v8, a5 +; VLEN512-NEXT: vmv.x.s a5, v24 +; VLEN512-NEXT: sb a0, 496(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 497(sp) +; VLEN512-NEXT: srli a0, a5, 56 +; VLEN512-NEXT: sb a0, 495(sp) +; VLEN512-NEXT: srli a0, a5, 48 +; VLEN512-NEXT: sb a0, 494(sp) +; VLEN512-NEXT: srli a0, a5, 40 +; VLEN512-NEXT: sb a0, 493(sp) +; VLEN512-NEXT: srli a0, a5, 32 +; VLEN512-NEXT: sb a0, 492(sp) +; VLEN512-NEXT: srli a0, a5, 24 +; VLEN512-NEXT: sb a0, 491(sp) +; VLEN512-NEXT: srli a0, a5, 16 +; VLEN512-NEXT: sb a0, 490(sp) +; VLEN512-NEXT: addi a0, zero, 60 +; VLEN512-NEXT: vslidedown.vx v24, v8, a0 +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: sb a5, 488(sp) +; VLEN512-NEXT: srli a5, a5, 8 +; VLEN512-NEXT: sb a5, 489(sp) +; VLEN512-NEXT: srli a5, a0, 56 +; VLEN512-NEXT: sb a5, 487(sp) +; VLEN512-NEXT: srli a5, a0, 48 +; VLEN512-NEXT: sb a5, 486(sp) +; VLEN512-NEXT: srli a5, a0, 40 +; VLEN512-NEXT: sb a5, 485(sp) +; VLEN512-NEXT: srli a5, a0, 32 +; VLEN512-NEXT: sb a5, 484(sp) +; VLEN512-NEXT: srli a5, a0, 24 +; VLEN512-NEXT: sb a5, 483(sp) +; VLEN512-NEXT: srli a5, a0, 16 +; VLEN512-NEXT: sb a5, 482(sp) +; VLEN512-NEXT: addi a5, zero, 59 +; VLEN512-NEXT: vslidedown.vx v24, v8, a5 +; VLEN512-NEXT: vmv.x.s a5, v24 +; VLEN512-NEXT: sb a0, 480(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 481(sp) +; VLEN512-NEXT: srli a0, a5, 56 +; VLEN512-NEXT: sb a0, 479(sp) +; VLEN512-NEXT: srli a0, a5, 48 +; VLEN512-NEXT: sb a0, 478(sp) +; VLEN512-NEXT: srli a0, a5, 40 +; VLEN512-NEXT: sb a0, 477(sp) +; VLEN512-NEXT: srli a0, a5, 32 +; VLEN512-NEXT: sb a0, 476(sp) +; VLEN512-NEXT: srli a0, a5, 24 +; VLEN512-NEXT: sb a0, 475(sp) +; VLEN512-NEXT: srli a0, a5, 16 +; VLEN512-NEXT: sb a0, 474(sp) +; VLEN512-NEXT: sb a5, 472(sp) +; VLEN512-NEXT: srli a0, a5, 8 +; VLEN512-NEXT: sb a0, 473(sp) +; VLEN512-NEXT: srli a0, a4, 56 +; VLEN512-NEXT: sb a0, 471(sp) +; VLEN512-NEXT: srli a0, a4, 48 +; VLEN512-NEXT: sb a0, 470(sp) +; VLEN512-NEXT: srli a0, a4, 40 +; VLEN512-NEXT: sb a0, 469(sp) +; VLEN512-NEXT: srli a0, a4, 32 +; VLEN512-NEXT: sb a0, 468(sp) +; VLEN512-NEXT: srli a0, a4, 24 +; VLEN512-NEXT: sb a0, 467(sp) +; VLEN512-NEXT: srli a0, a4, 16 +; VLEN512-NEXT: sb a0, 466(sp) +; VLEN512-NEXT: sb a4, 464(sp) +; VLEN512-NEXT: srli a0, a4, 8 +; VLEN512-NEXT: sb a0, 465(sp) +; VLEN512-NEXT: srli a0, a3, 56 +; VLEN512-NEXT: sb a0, 463(sp) +; VLEN512-NEXT: srli a0, a3, 48 +; VLEN512-NEXT: sb a0, 462(sp) +; VLEN512-NEXT: srli a0, a3, 40 +; VLEN512-NEXT: sb a0, 461(sp) +; VLEN512-NEXT: srli a0, a3, 32 +; VLEN512-NEXT: sb a0, 460(sp) +; VLEN512-NEXT: srli a0, a3, 24 +; VLEN512-NEXT: sb a0, 459(sp) +; VLEN512-NEXT: srli a0, a3, 16 +; VLEN512-NEXT: sb a0, 458(sp) +; VLEN512-NEXT: sb a3, 456(sp) +; VLEN512-NEXT: srli a0, a3, 8 +; VLEN512-NEXT: sb a0, 457(sp) +; VLEN512-NEXT: srli a0, s1, 56 +; VLEN512-NEXT: sb a0, 455(sp) +; VLEN512-NEXT: srli a0, s1, 48 +; VLEN512-NEXT: sb a0, 454(sp) +; VLEN512-NEXT: srli a0, s1, 40 +; VLEN512-NEXT: sb a0, 453(sp) +; VLEN512-NEXT: srli a0, s1, 32 +; VLEN512-NEXT: sb a0, 452(sp) +; VLEN512-NEXT: srli a0, s1, 24 +; VLEN512-NEXT: sb a0, 451(sp) +; VLEN512-NEXT: srli a0, s1, 16 +; VLEN512-NEXT: sb a0, 450(sp) +; VLEN512-NEXT: sb s1, 448(sp) +; VLEN512-NEXT: srli a0, s1, 8 +; VLEN512-NEXT: sb a0, 449(sp) +; VLEN512-NEXT: srli a0, a2, 56 +; VLEN512-NEXT: sb a0, 447(sp) +; VLEN512-NEXT: srli a0, a2, 48 +; VLEN512-NEXT: sb a0, 446(sp) +; VLEN512-NEXT: srli a0, a2, 40 +; VLEN512-NEXT: sb a0, 445(sp) +; VLEN512-NEXT: srli a0, a2, 32 +; VLEN512-NEXT: sb a0, 444(sp) +; VLEN512-NEXT: srli a0, a2, 24 +; VLEN512-NEXT: sb a0, 443(sp) +; VLEN512-NEXT: srli a0, a2, 16 +; VLEN512-NEXT: sb a0, 442(sp) +; VLEN512-NEXT: sb a2, 440(sp) +; VLEN512-NEXT: srli a0, a2, 8 +; VLEN512-NEXT: sb a0, 441(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 439(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 438(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 437(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 436(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 435(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 434(sp) +; VLEN512-NEXT: sb a1, 432(sp) +; VLEN512-NEXT: srli a0, a1, 8 +; VLEN512-NEXT: sb a0, 433(sp) +; VLEN512-NEXT: srli a0, ra, 56 +; VLEN512-NEXT: sb a0, 431(sp) +; VLEN512-NEXT: srli a0, ra, 48 +; VLEN512-NEXT: sb a0, 430(sp) +; VLEN512-NEXT: srli a0, ra, 40 +; VLEN512-NEXT: sb a0, 429(sp) +; VLEN512-NEXT: srli a0, ra, 32 +; VLEN512-NEXT: sb a0, 428(sp) +; VLEN512-NEXT: srli a0, ra, 24 +; VLEN512-NEXT: sb a0, 427(sp) +; VLEN512-NEXT: srli a0, ra, 16 +; VLEN512-NEXT: sb a0, 426(sp) +; VLEN512-NEXT: sb ra, 424(sp) +; VLEN512-NEXT: srli a0, ra, 8 +; VLEN512-NEXT: sb a0, 425(sp) +; VLEN512-NEXT: srli a0, s11, 56 +; VLEN512-NEXT: sb a0, 423(sp) +; VLEN512-NEXT: srli a0, s11, 48 +; VLEN512-NEXT: sb a0, 422(sp) +; VLEN512-NEXT: srli a0, s11, 40 +; VLEN512-NEXT: sb a0, 421(sp) +; VLEN512-NEXT: srli a0, s11, 32 +; VLEN512-NEXT: sb a0, 420(sp) +; VLEN512-NEXT: srli a0, s11, 24 +; VLEN512-NEXT: sb a0, 419(sp) +; VLEN512-NEXT: srli a0, s11, 16 +; VLEN512-NEXT: sb a0, 418(sp) +; VLEN512-NEXT: sb s11, 416(sp) +; VLEN512-NEXT: srli a0, s11, 8 +; VLEN512-NEXT: sb a0, 417(sp) +; VLEN512-NEXT: srli a0, s10, 56 +; VLEN512-NEXT: sb a0, 415(sp) +; VLEN512-NEXT: srli a0, s10, 48 +; VLEN512-NEXT: sb a0, 414(sp) +; VLEN512-NEXT: srli a0, s10, 40 +; VLEN512-NEXT: sb a0, 413(sp) +; VLEN512-NEXT: srli a0, s10, 32 +; VLEN512-NEXT: sb a0, 412(sp) +; VLEN512-NEXT: srli a0, s10, 24 +; VLEN512-NEXT: sb a0, 411(sp) +; VLEN512-NEXT: srli a0, s10, 16 +; VLEN512-NEXT: sb a0, 410(sp) +; VLEN512-NEXT: sb s10, 408(sp) +; VLEN512-NEXT: srli a0, s10, 8 +; VLEN512-NEXT: sb a0, 409(sp) +; VLEN512-NEXT: srli a0, s9, 56 +; VLEN512-NEXT: sb a0, 407(sp) +; VLEN512-NEXT: srli a0, s9, 48 +; VLEN512-NEXT: sb a0, 406(sp) +; VLEN512-NEXT: srli a0, s9, 40 +; VLEN512-NEXT: sb a0, 405(sp) +; VLEN512-NEXT: srli a0, s9, 32 +; VLEN512-NEXT: sb a0, 404(sp) +; VLEN512-NEXT: srli a0, s9, 24 +; VLEN512-NEXT: sb a0, 403(sp) +; VLEN512-NEXT: srli a0, s9, 16 +; VLEN512-NEXT: sb a0, 402(sp) +; VLEN512-NEXT: sb s9, 400(sp) +; VLEN512-NEXT: srli a0, s9, 8 +; VLEN512-NEXT: sb a0, 401(sp) +; VLEN512-NEXT: srli a0, s8, 56 +; VLEN512-NEXT: sb a0, 399(sp) +; VLEN512-NEXT: srli a0, s8, 48 +; VLEN512-NEXT: sb a0, 398(sp) +; VLEN512-NEXT: srli a0, s8, 40 +; VLEN512-NEXT: sb a0, 397(sp) +; VLEN512-NEXT: srli a0, s8, 32 +; VLEN512-NEXT: sb a0, 396(sp) +; VLEN512-NEXT: srli a0, s8, 24 +; VLEN512-NEXT: sb a0, 395(sp) +; VLEN512-NEXT: srli a0, s8, 16 +; VLEN512-NEXT: sb a0, 394(sp) +; VLEN512-NEXT: sb s8, 392(sp) +; VLEN512-NEXT: srli a0, s8, 8 +; VLEN512-NEXT: sb a0, 393(sp) +; VLEN512-NEXT: srli a0, s7, 56 +; VLEN512-NEXT: sb a0, 391(sp) +; VLEN512-NEXT: srli a0, s7, 48 +; VLEN512-NEXT: sb a0, 390(sp) +; VLEN512-NEXT: srli a0, s7, 40 +; VLEN512-NEXT: sb a0, 389(sp) +; VLEN512-NEXT: srli a0, s7, 32 +; VLEN512-NEXT: sb a0, 388(sp) +; VLEN512-NEXT: srli a0, s7, 24 +; VLEN512-NEXT: sb a0, 387(sp) +; VLEN512-NEXT: srli a0, s7, 16 +; VLEN512-NEXT: sb a0, 386(sp) +; VLEN512-NEXT: sb s7, 384(sp) +; VLEN512-NEXT: srli a0, s7, 8 +; VLEN512-NEXT: sb a0, 385(sp) +; VLEN512-NEXT: srli a0, s6, 56 +; VLEN512-NEXT: sb a0, 383(sp) +; VLEN512-NEXT: srli a0, s6, 48 +; VLEN512-NEXT: sb a0, 382(sp) +; VLEN512-NEXT: srli a0, s6, 40 +; VLEN512-NEXT: sb a0, 381(sp) +; VLEN512-NEXT: srli a0, s6, 32 +; VLEN512-NEXT: sb a0, 380(sp) +; VLEN512-NEXT: srli a0, s6, 24 +; VLEN512-NEXT: sb a0, 379(sp) +; VLEN512-NEXT: srli a0, s6, 16 +; VLEN512-NEXT: sb a0, 378(sp) +; VLEN512-NEXT: sb s6, 376(sp) +; VLEN512-NEXT: srli a0, s6, 8 +; VLEN512-NEXT: sb a0, 377(sp) +; VLEN512-NEXT: srli a0, s5, 56 +; VLEN512-NEXT: sb a0, 375(sp) +; VLEN512-NEXT: srli a0, s5, 48 +; VLEN512-NEXT: sb a0, 374(sp) +; VLEN512-NEXT: srli a0, s5, 40 +; VLEN512-NEXT: sb a0, 373(sp) +; VLEN512-NEXT: srli a0, s5, 32 +; VLEN512-NEXT: sb a0, 372(sp) +; VLEN512-NEXT: srli a0, s5, 24 +; VLEN512-NEXT: sb a0, 371(sp) +; VLEN512-NEXT: srli a0, s5, 16 +; VLEN512-NEXT: sb a0, 370(sp) +; VLEN512-NEXT: sb s5, 368(sp) +; VLEN512-NEXT: srli a0, s5, 8 +; VLEN512-NEXT: sb a0, 369(sp) +; VLEN512-NEXT: srli a0, s4, 56 +; VLEN512-NEXT: sb a0, 367(sp) +; VLEN512-NEXT: srli a0, s4, 48 +; VLEN512-NEXT: sb a0, 366(sp) +; VLEN512-NEXT: srli a0, s4, 40 +; VLEN512-NEXT: sb a0, 365(sp) +; VLEN512-NEXT: srli a0, s4, 32 +; VLEN512-NEXT: sb a0, 364(sp) +; VLEN512-NEXT: srli a0, s4, 24 +; VLEN512-NEXT: sb a0, 363(sp) +; VLEN512-NEXT: srli a0, s4, 16 +; VLEN512-NEXT: sb a0, 362(sp) +; VLEN512-NEXT: sb s4, 360(sp) +; VLEN512-NEXT: srli a0, s4, 8 +; VLEN512-NEXT: sb a0, 361(sp) +; VLEN512-NEXT: srli a0, s3, 56 +; VLEN512-NEXT: sb a0, 359(sp) +; VLEN512-NEXT: srli a0, s3, 48 +; VLEN512-NEXT: sb a0, 358(sp) +; VLEN512-NEXT: srli a0, s3, 40 +; VLEN512-NEXT: sb a0, 357(sp) +; VLEN512-NEXT: srli a0, s3, 32 +; VLEN512-NEXT: sb a0, 356(sp) +; VLEN512-NEXT: srli a0, s3, 24 +; VLEN512-NEXT: sb a0, 355(sp) +; VLEN512-NEXT: srli a0, s3, 16 +; VLEN512-NEXT: sb a0, 354(sp) +; VLEN512-NEXT: sb s3, 352(sp) +; VLEN512-NEXT: srli a0, s3, 8 +; VLEN512-NEXT: sb a0, 353(sp) +; VLEN512-NEXT: srli a0, s2, 56 +; VLEN512-NEXT: sb a0, 351(sp) +; VLEN512-NEXT: srli a0, s2, 48 +; VLEN512-NEXT: sb a0, 350(sp) +; VLEN512-NEXT: srli a0, s2, 40 +; VLEN512-NEXT: sb a0, 349(sp) +; VLEN512-NEXT: srli a0, s2, 32 +; VLEN512-NEXT: sb a0, 348(sp) +; VLEN512-NEXT: srli a0, s2, 24 +; VLEN512-NEXT: sb a0, 347(sp) +; VLEN512-NEXT: srli a0, s2, 16 +; VLEN512-NEXT: sb a0, 346(sp) +; VLEN512-NEXT: sb s2, 344(sp) +; VLEN512-NEXT: srli a0, s2, 8 +; VLEN512-NEXT: sb a0, 345(sp) +; VLEN512-NEXT: srli a0, t6, 56 +; VLEN512-NEXT: sb a0, 343(sp) +; VLEN512-NEXT: srli a0, t6, 48 +; VLEN512-NEXT: sb a0, 342(sp) +; VLEN512-NEXT: srli a0, t6, 40 +; VLEN512-NEXT: sb a0, 341(sp) +; VLEN512-NEXT: srli a0, t6, 32 +; VLEN512-NEXT: sb a0, 340(sp) +; VLEN512-NEXT: srli a0, t6, 24 +; VLEN512-NEXT: sb a0, 339(sp) +; VLEN512-NEXT: srli a0, t6, 16 +; VLEN512-NEXT: sb a0, 338(sp) +; VLEN512-NEXT: sb t6, 336(sp) +; VLEN512-NEXT: srli a0, t6, 8 +; VLEN512-NEXT: sb a0, 337(sp) +; VLEN512-NEXT: srli a0, t5, 56 +; VLEN512-NEXT: sb a0, 335(sp) +; VLEN512-NEXT: srli a0, t5, 48 +; VLEN512-NEXT: sb a0, 334(sp) +; VLEN512-NEXT: srli a0, t5, 40 +; VLEN512-NEXT: sb a0, 333(sp) +; VLEN512-NEXT: srli a0, t5, 32 +; VLEN512-NEXT: sb a0, 332(sp) +; VLEN512-NEXT: srli a0, t5, 24 +; VLEN512-NEXT: sb a0, 331(sp) +; VLEN512-NEXT: srli a0, t5, 16 +; VLEN512-NEXT: sb a0, 330(sp) +; VLEN512-NEXT: sb t5, 328(sp) +; VLEN512-NEXT: srli a0, t5, 8 +; VLEN512-NEXT: sb a0, 329(sp) +; VLEN512-NEXT: srli a0, t4, 56 +; VLEN512-NEXT: sb a0, 327(sp) +; VLEN512-NEXT: srli a0, t4, 48 +; VLEN512-NEXT: sb a0, 326(sp) +; VLEN512-NEXT: srli a0, t4, 40 +; VLEN512-NEXT: sb a0, 325(sp) +; VLEN512-NEXT: srli a0, t4, 32 +; VLEN512-NEXT: sb a0, 324(sp) +; VLEN512-NEXT: srli a0, t4, 24 +; VLEN512-NEXT: sb a0, 323(sp) +; VLEN512-NEXT: srli a0, t4, 16 +; VLEN512-NEXT: sb a0, 322(sp) +; VLEN512-NEXT: sb t4, 320(sp) +; VLEN512-NEXT: srli a0, t4, 8 +; VLEN512-NEXT: sb a0, 321(sp) +; VLEN512-NEXT: srli a0, t3, 56 +; VLEN512-NEXT: sb a0, 319(sp) +; VLEN512-NEXT: srli a0, t3, 48 +; VLEN512-NEXT: sb a0, 318(sp) +; VLEN512-NEXT: srli a0, t3, 40 +; VLEN512-NEXT: sb a0, 317(sp) +; VLEN512-NEXT: srli a0, t3, 32 +; VLEN512-NEXT: sb a0, 316(sp) +; VLEN512-NEXT: srli a0, t3, 24 +; VLEN512-NEXT: sb a0, 315(sp) +; VLEN512-NEXT: srli a0, t3, 16 +; VLEN512-NEXT: sb a0, 314(sp) +; VLEN512-NEXT: sb t3, 312(sp) +; VLEN512-NEXT: srli a0, t3, 8 +; VLEN512-NEXT: sb a0, 313(sp) +; VLEN512-NEXT: srli a0, t2, 56 +; VLEN512-NEXT: sb a0, 311(sp) +; VLEN512-NEXT: srli a0, t2, 48 +; VLEN512-NEXT: sb a0, 310(sp) +; VLEN512-NEXT: srli a0, t2, 40 +; VLEN512-NEXT: sb a0, 309(sp) +; VLEN512-NEXT: srli a0, t2, 32 +; VLEN512-NEXT: sb a0, 308(sp) +; VLEN512-NEXT: srli a0, t2, 24 +; VLEN512-NEXT: sb a0, 307(sp) +; VLEN512-NEXT: srli a0, t2, 16 +; VLEN512-NEXT: sb a0, 306(sp) +; VLEN512-NEXT: sb t2, 304(sp) +; VLEN512-NEXT: srli a0, t2, 8 +; VLEN512-NEXT: sb a0, 305(sp) +; VLEN512-NEXT: srli a0, t1, 56 +; VLEN512-NEXT: sb a0, 303(sp) +; VLEN512-NEXT: srli a0, t1, 48 +; VLEN512-NEXT: sb a0, 302(sp) +; VLEN512-NEXT: srli a0, t1, 40 +; VLEN512-NEXT: sb a0, 301(sp) +; VLEN512-NEXT: srli a0, t1, 32 +; VLEN512-NEXT: sb a0, 300(sp) +; VLEN512-NEXT: srli a0, t1, 24 +; VLEN512-NEXT: sb a0, 299(sp) +; VLEN512-NEXT: srli a0, t1, 16 +; VLEN512-NEXT: sb a0, 298(sp) +; VLEN512-NEXT: sb t1, 296(sp) +; VLEN512-NEXT: srli a0, t1, 8 +; VLEN512-NEXT: sb a0, 297(sp) +; VLEN512-NEXT: srli a0, t0, 56 +; VLEN512-NEXT: sb a0, 295(sp) +; VLEN512-NEXT: srli a0, t0, 48 +; VLEN512-NEXT: sb a0, 294(sp) +; VLEN512-NEXT: srli a0, t0, 40 +; VLEN512-NEXT: sb a0, 293(sp) +; VLEN512-NEXT: srli a0, t0, 32 +; VLEN512-NEXT: sb a0, 292(sp) +; VLEN512-NEXT: srli a0, t0, 24 +; VLEN512-NEXT: sb a0, 291(sp) +; VLEN512-NEXT: srli a0, t0, 16 +; VLEN512-NEXT: sb a0, 290(sp) +; VLEN512-NEXT: sb t0, 288(sp) +; VLEN512-NEXT: srli a0, t0, 8 +; VLEN512-NEXT: sb a0, 289(sp) +; VLEN512-NEXT: srli a0, a7, 56 +; VLEN512-NEXT: sb a0, 287(sp) +; VLEN512-NEXT: srli a0, a7, 48 +; VLEN512-NEXT: sb a0, 286(sp) +; VLEN512-NEXT: srli a0, a7, 40 +; VLEN512-NEXT: sb a0, 285(sp) +; VLEN512-NEXT: srli a0, a7, 32 +; VLEN512-NEXT: sb a0, 284(sp) +; VLEN512-NEXT: srli a0, a7, 24 +; VLEN512-NEXT: sb a0, 283(sp) +; VLEN512-NEXT: srli a0, a7, 16 +; VLEN512-NEXT: sb a0, 282(sp) +; VLEN512-NEXT: vmv.x.s a0, v0 +; VLEN512-NEXT: sb a7, 280(sp) +; VLEN512-NEXT: srli a1, a7, 8 +; VLEN512-NEXT: sb a1, 281(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 279(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 278(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 277(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 276(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 275(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 274(sp) +; VLEN512-NEXT: addi a1, sp, 920 +; VLEN512-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: sb a0, 272(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 273(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 271(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 270(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 269(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 268(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 267(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 266(sp) +; VLEN512-NEXT: srli a0, a6, 16 +; VLEN512-NEXT: sb a1, 264(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 265(sp) +; VLEN512-NEXT: srli a1, a6, 56 +; VLEN512-NEXT: sb a1, 263(sp) +; VLEN512-NEXT: srli a1, a6, 48 +; VLEN512-NEXT: sb a1, 262(sp) +; VLEN512-NEXT: srli a1, a6, 40 +; VLEN512-NEXT: sb a1, 261(sp) +; VLEN512-NEXT: srli a1, a6, 32 +; VLEN512-NEXT: sb a1, 260(sp) +; VLEN512-NEXT: srli a1, a6, 24 +; VLEN512-NEXT: sb a1, 259(sp) +; VLEN512-NEXT: vmv.x.s a1, v8 +; VLEN512-NEXT: sb a0, 258(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a6, 256(sp) +; VLEN512-NEXT: srli a2, a6, 8 +; VLEN512-NEXT: sb a2, 257(sp) +; VLEN512-NEXT: vslidedown.vi v24, v8, 31 +; VLEN512-NEXT: sb a0, 519(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 518(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 517(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 516(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 515(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 514(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 30 +; VLEN512-NEXT: sb a1, 512(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 513(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 767(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 766(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 765(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 764(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 763(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 762(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 29 +; VLEN512-NEXT: sb a0, 760(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 761(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 759(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 758(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 757(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 756(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 755(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 754(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 28 +; VLEN512-NEXT: sb a1, 752(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 753(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 751(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 750(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 749(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 748(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 747(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 746(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 27 +; VLEN512-NEXT: sb a0, 744(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 745(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 743(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 742(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 741(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 740(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 739(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 738(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 26 +; VLEN512-NEXT: sb a1, 736(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 737(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 735(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 734(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 733(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 732(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 731(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 730(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 25 +; VLEN512-NEXT: sb a0, 728(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 729(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 727(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 726(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 725(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 724(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 723(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 722(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 24 +; VLEN512-NEXT: sb a1, 720(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 721(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 719(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 718(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 717(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 716(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 715(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 714(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 23 +; VLEN512-NEXT: sb a0, 712(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 713(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 711(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 710(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 709(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 708(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 707(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 706(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 22 +; VLEN512-NEXT: sb a1, 704(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 705(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 703(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 702(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 701(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 700(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 699(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 698(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 21 +; VLEN512-NEXT: sb a0, 696(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 697(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 695(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 694(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 693(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 692(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 691(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 690(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 20 +; VLEN512-NEXT: sb a1, 688(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 689(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 687(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 686(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 685(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 684(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 683(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 682(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 19 +; VLEN512-NEXT: sb a0, 680(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 681(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 679(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 678(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 677(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 676(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 675(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 674(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 18 +; VLEN512-NEXT: sb a1, 672(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 673(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 671(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 670(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 669(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 668(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 667(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 666(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 17 +; VLEN512-NEXT: sb a0, 664(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 665(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 663(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 662(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 661(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 660(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 659(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 658(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 16 +; VLEN512-NEXT: sb a1, 656(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 657(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 655(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 654(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 653(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 652(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 651(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 650(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 15 +; VLEN512-NEXT: sb a0, 648(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 649(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 647(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 646(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 645(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 644(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 643(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 642(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 14 +; VLEN512-NEXT: sb a1, 640(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 641(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 639(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 638(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 637(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 636(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 635(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 634(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 13 +; VLEN512-NEXT: sb a0, 632(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 633(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 631(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 630(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 629(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 628(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 627(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 626(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 12 +; VLEN512-NEXT: sb a1, 624(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 625(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 623(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 622(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 621(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 620(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 619(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 618(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 11 +; VLEN512-NEXT: sb a0, 616(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 617(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 615(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 614(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 613(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 612(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 611(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 610(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 10 +; VLEN512-NEXT: sb a1, 608(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 609(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 607(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 606(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 605(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 604(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 603(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 602(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 9 +; VLEN512-NEXT: sb a0, 600(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 601(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 599(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 598(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 597(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 596(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 595(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 594(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 8 +; VLEN512-NEXT: sb a1, 592(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 593(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 591(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 590(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 589(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 588(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 587(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 586(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 7 +; VLEN512-NEXT: sb a0, 584(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 585(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 583(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 582(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 581(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 580(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 579(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 578(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 6 +; VLEN512-NEXT: sb a1, 576(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 577(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 575(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 574(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 573(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 572(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 571(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 570(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 5 +; VLEN512-NEXT: sb a0, 568(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 569(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 567(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 566(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 565(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 564(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 563(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 562(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 4 +; VLEN512-NEXT: sb a1, 560(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 561(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 559(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 558(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 557(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 556(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 555(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 554(sp) +; VLEN512-NEXT: vmv.x.s a1, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 3 +; VLEN512-NEXT: sb a0, 552(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 553(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 551(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 550(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 549(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 548(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 547(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 546(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: vslidedown.vi v24, v8, 1 +; VLEN512-NEXT: vslidedown.vi v8, v8, 2 +; VLEN512-NEXT: sb a1, 544(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 545(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 543(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 542(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 541(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 540(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 539(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 538(sp) +; VLEN512-NEXT: vmv.x.s a1, v8 +; VLEN512-NEXT: sb a0, 536(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 537(sp) +; VLEN512-NEXT: srli a0, a1, 56 +; VLEN512-NEXT: sb a0, 535(sp) +; VLEN512-NEXT: srli a0, a1, 48 +; VLEN512-NEXT: sb a0, 534(sp) +; VLEN512-NEXT: srli a0, a1, 40 +; VLEN512-NEXT: sb a0, 533(sp) +; VLEN512-NEXT: srli a0, a1, 32 +; VLEN512-NEXT: sb a0, 532(sp) +; VLEN512-NEXT: srli a0, a1, 24 +; VLEN512-NEXT: sb a0, 531(sp) +; VLEN512-NEXT: srli a0, a1, 16 +; VLEN512-NEXT: sb a0, 530(sp) +; VLEN512-NEXT: vmv.x.s a0, v24 +; VLEN512-NEXT: sb a1, 528(sp) +; VLEN512-NEXT: srli a1, a1, 8 +; VLEN512-NEXT: sb a1, 529(sp) +; VLEN512-NEXT: srli a1, a0, 56 +; VLEN512-NEXT: sb a1, 527(sp) +; VLEN512-NEXT: srli a1, a0, 48 +; VLEN512-NEXT: sb a1, 526(sp) +; VLEN512-NEXT: srli a1, a0, 40 +; VLEN512-NEXT: sb a1, 525(sp) +; VLEN512-NEXT: srli a1, a0, 32 +; VLEN512-NEXT: sb a1, 524(sp) +; VLEN512-NEXT: srli a1, a0, 24 +; VLEN512-NEXT: sb a1, 523(sp) +; VLEN512-NEXT: srli a1, a0, 16 +; VLEN512-NEXT: sb a1, 522(sp) +; VLEN512-NEXT: sb a0, 520(sp) +; VLEN512-NEXT: srli a0, a0, 8 +; VLEN512-NEXT: sb a0, 521(sp) +; VLEN512-NEXT: addi a0, zero, 256 +; VLEN512-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; VLEN512-NEXT: addi a0, sp, 512 +; VLEN512-NEXT: vle8.v v28, (a0) +; VLEN512-NEXT: addi a0, sp, 256 +; VLEN512-NEXT: vle8.v v12, (a0) +; VLEN512-NEXT: vadd.vv v8, v16, v28 +; VLEN512-NEXT: vadd.vv v12, v20, v12 +; VLEN512-NEXT: addi sp, s0, -1024 +; VLEN512-NEXT: ld s11, 920(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s10, 928(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s9, 936(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s8, 944(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s7, 952(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s6, 960(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s5, 968(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s4, 976(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s3, 984(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s2, 992(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s1, 1000(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload +; VLEN512-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload +; VLEN512-NEXT: addi sp, sp, 1024 +; VLEN512-NEXT: ret + %c = bitcast <256 x i16> %a to <512 x i8> + %v = add <512 x i8> %b, %c + ret <512 x i8> %v +}