diff --git a/test/CodeGen/AMDGPU/fmax3.ll b/test/CodeGen/AMDGPU/fmax3.ll index 39455acad48..1f67ace72df 100644 --- a/test/CodeGen/AMDGPU/fmax3.ll +++ b/test/CodeGen/AMDGPU/fmax3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}test_fmax3_olt_0_f32: ; GCN: buffer_load_dword [[REGC:v[0-9]+]] @@ -38,20 +38,23 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float } ; GCN-LABEL: {{^}}test_fmax3_olt_0_f16: -; GCN: buffer_load_ushort [[REGC:v[0-9]+]] -; GCN: buffer_load_ushort [[REGB:v[0-9]+]] ; GCN: buffer_load_ushort [[REGA:v[0-9]+]] +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] +; GCN: buffer_load_ushort [[REGC:v[0-9]+]] -; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], -; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]] +; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_A]], [[CVT_B]], [[CVT_C]] +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]] ; VI: v_max_f16_e32 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], -; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]] ; GCN: buffer_store_short [[RESULT]], define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { - %a = load volatile half, half addrspace(1)* %aptr, align 2 + %a = load volatile half, half addrspace(1)* %aptr, align 2 %b = load volatile half, half addrspace(1)* %bptr, align 2 %c = load volatile half, half addrspace(1)* %cptr, align 2 %f0 = call half @llvm.maxnum.f16(half %a, half %b) @@ -62,17 +65,20 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half ad ; Commute operand of second fmax ; GCN-LABEL: {{^}}test_fmax3_olt_1_f16: -; GCN: buffer_load_ushort [[REGB:v[0-9]+]] ; GCN: buffer_load_ushort [[REGA:v[0-9]+]] +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] ; GCN: buffer_load_ushort [[REGC:v[0-9]+]] -; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], -; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]] +; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]] +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]] ; VI: v_max_f16_e32 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], -; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]] ; GCN: buffer_store_short [[RESULT]], define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { %a = load volatile half, half addrspace(1)* %aptr, align 2 diff --git a/test/CodeGen/AMDGPU/fmin3.ll b/test/CodeGen/AMDGPU/fmin3.ll index 06befaa64b5..fa93fbcfb91 100644 --- a/test/CodeGen/AMDGPU/fmin3.ll +++ b/test/CodeGen/AMDGPU/fmin3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}test_fmin3_olt_0_f32: ; GCN: buffer_load_dword [[REGC:v[0-9]+]] @@ -60,17 +60,20 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half ad ; Commute operand of second fmin ; GCN-LABEL: {{^}}test_fmin3_olt_1_f16: -; GCN: buffer_load_ushort [[REGB:v[0-9]+]] ; GCN: buffer_load_ushort [[REGA:v[0-9]+]] +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] ; GCN: buffer_load_ushort [[REGC:v[0-9]+]] -; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], -; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]] +; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]] +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]] ; VI: v_min_f16_e32 ; VI: v_min_f16_e32 [[RESULT:v[0-9]+]], -; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]] ; GCN: buffer_store_short [[RESULT]], define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { %a = load volatile half, half addrspace(1)* %aptr, align 2 diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll index 4d08651dcb4..e14d4019c18 100644 --- a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -1,6 +1,6 @@ -; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH %s ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't