[AMDGPU] Regenerate global-load-saddr-to-vaddr test checks

To simplify diff in future patch
2025-01-31 20:51:52 +01:00 · 2021-07-25 13:27:30 +01:00 · 2021-07-25 13:27:30 +01:00 · fcfcb73f87
commit fcfcb73f87
parent 2561fe6c14
1 changed files with 45 additions and 7 deletions
--- a/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
+++ b/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s

 ; The first load produces address in a VGPR which is used in address calculation
@ -9,11 +10,30 @@
 ; Check that we are changing SADDR form of a load to VADDR and do not have to use
 ; readfirstlane instructions to move address from VGPRs into SGPRs.

-; GCN-LABEL: {{^}}test_move_load_address_to_vgpr:
-; GCN: BB{{[0-9]+}}_1:
-; GCN-NOT: v_readfirstlane_b32
-; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
 define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* nocapture %arg) {
+; GCN-LABEL: test_move_load_address_to_vgpr:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_dword v1, v2, s[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, s1
+; GCN-NEXT:    v_add_u32_e32 v0, 0xffffff00, v1
+; GCN-NEXT:    v_lshlrev_b64 v[1:2], 2, v[1:2]
+; GCN-NEXT:    v_add_co_u32_e32 v1, vcc, s0, v1
+; GCN-NEXT:    v_addc_co_u32_e32 v2, vcc, v3, v2, vcc
+; GCN-NEXT:  BB0_1: ; %bb3
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    global_load_dword v3, v[1:2], off glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GCN-NEXT:    v_add_co_u32_e64 v1, s[0:1], 4, v1
+; GCN-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, v2, s[0:1]
+; GCN-NEXT:    s_and_b64 vcc, exec, vcc
+; GCN-NEXT:    s_cbranch_vccz BB0_1
+; GCN-NEXT:  ; %bb.2: ; %bb2
+; GCN-NEXT:    s_endpgm
 bb:
  %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 0
  %i2 = load volatile i32, i32 addrspace(1)* %i1, align 4
@ -32,10 +52,28 @@ bb3:                                              ; preds = %bb3, %bb
  br i1 %i9, label %bb2, label %bb3
 }

-; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
-; GCN-NOT: v_readfirstlane_b32
-; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
 define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
+; GCN-LABEL: test_move_load_address_to_vgpr_d16_hi:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_ushort v0, v1, s[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:  BB1_1: ; %bb3
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s1
+; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v0, v3, vcc
+; GCN-NEXT:    global_load_short_d16_hi v0, v[2:3], off glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0x100, v0
+; GCN-NEXT:    s_and_b64 vcc, exec, vcc
+; GCN-NEXT:    s_cbranch_vccz BB1_1
+; GCN-NEXT:  ; %bb.2: ; %bb2
+; GCN-NEXT:    s_endpgm
 bb:
  %i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
  %load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4