AMDGPU: Remove superfluous string attributes from tests

Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129
2025-01-31 20:51:52 +01:00 · 2016-07-11 23:35:48 +00:00 · 2016-07-11 23:35:48 +00:00 · c37f15f5b5
commit c37f15f5b5
parent b4798b4f55
24 changed files with 147 additions and 93 deletions
--- a/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/test/CodeGen/AMDGPU/commute-shifts.ll
@ -25,5 +25,5 @@ declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i3
 declare i32 @llvm.SI.packf16(float, float) #1
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

-attributes #0 = { "enable-no-nans-fp-math"="true" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/debugger-insert-nops.ll
+++ b/test/CodeGen/AMDGPU/debugger-insert-nops.ll
@ -34,7 +34,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

 !llvm.dbg.cu = !{!0}
--- a/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
+++ b/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
@ -25,7 +25,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

 !llvm.dbg.cu = !{!0}
--- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@ -67,4 +67,4 @@ for.end:                                          ; preds = %for.body

 attributes #0 = { nounwind readnone }
 attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
--- a/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/test/CodeGen/AMDGPU/ds_read2.ll
@ -508,6 +508,6 @@ declare i32 @llvm.amdgcn.workitem.id.y() #1
 ; Function Attrs: convergent nounwind
 declare void @llvm.amdgcn.s.barrier() #2

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
--- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@ -204,6 +204,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() #1

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
--- a/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_read2st64.ll
@ -258,5 +258,5 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() #1

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/test/CodeGen/AMDGPU/ds_write2.ll
@ -431,6 +431,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() #1

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
--- a/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_write2st64.ll
@ -103,6 +103,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() #1

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
--- a/test/CodeGen/AMDGPU/fmed3.ll
+++ b/test/CodeGen/AMDGPU/fmed3.ll
@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s

-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare float @llvm.minnum.f32(float, float) #0
 declare float @llvm.maxnum.f32(float, float) #0
 declare double @llvm.minnum.f64(double, double) #0
@ -13,7 +13,7 @@ declare double @llvm.maxnum.f64(double, double) #0
 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -31,7 +31,7 @@ define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)
 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -49,7 +49,7 @@ define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float add
 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -65,7 +65,7 @@ define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float add
 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -82,7 +82,7 @@ define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, flo
 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -99,7 +99,7 @@ define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float ad
 ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
 ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
 define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
  %a = load double, double addrspace(1)* %gep0
@ -114,7 +114,7 @@ define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(
 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
 define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
@ -132,7 +132,7 @@ define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addr
 ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
 ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %a = load float, float addrspace(1)* %gep0
--- a/test/CodeGen/AMDGPU/fmul.ll
+++ b/test/CodeGen/AMDGPU/fmul.ll
@ -1,12 +1,11 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

-
 ; FUNC-LABEL: {{^}}fmul_f32:
-; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; GCN: v_mul_f32

-; SI: v_mul_f32
+; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
 define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
 entry:
  %0 = fmul float %a, %b
@ -19,11 +18,11 @@ declare float @llvm.R600.load.input(i32) readnone
 declare void @llvm.AMDGPU.store.output(float, i32)

 ; FUNC-LABEL: {{^}}fmul_v2f32:
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; GCN: v_mul_f32
+; GCN: v_mul_f32

-; SI: v_mul_f32
-; SI: v_mul_f32
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
 define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
  %0 = fmul <2 x float> %a, %b
@ -32,15 +31,15 @@ entry:
 }

 ; FUNC-LABEL: {{^}}fmul_v4f32:
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; GCN: v_mul_f32
+; GCN: v_mul_f32
+; GCN: v_mul_f32
+; GCN: v_mul_f32

-; SI: v_mul_f32
-; SI: v_mul_f32
-; SI: v_mul_f32
-; SI: v_mul_f32
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
  %a = load <4 x float>, <4 x float> addrspace(1) * %in
@ -51,9 +50,9 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
 }

 ; FUNC-LABEL: {{^}}test_mul_2_k:
-; SI: v_mul_f32
-; SI-NOT: v_mul_f32
-; SI: s_endpgm
+; GCN: v_mul_f32
+; GCN-NOT: v_mul_f32
+; GCN: s_endpgm
 define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
  %y = fmul float %x, 2.0
  %z = fmul float %y, 3.0
@ -62,10 +61,10 @@ define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
 }

 ; FUNC-LABEL: {{^}}test_mul_2_k_inv:
-; SI: v_mul_f32
-; SI-NOT: v_mul_f32
-; SI-NOT: v_mad_f32
-; SI: s_endpgm
+; GCN: v_mul_f32
+; GCN-NOT: v_mul_f32
+; GCN-NOT: v_mad_f32
+; GCN: s_endpgm
 define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
  %y = fmul float %x, 3.0
  %z = fmul float %y, 2.0
@ -76,10 +75,10 @@ define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
 ; There should be three multiplies here; %a should be used twice (once
 ; negated), not duplicated into mul x, 5.0 and mul x, -5.0.
 ; FUNC-LABEL: {{^}}test_mul_twouse:
-; SI: v_mul_f32
-; SI: v_mul_f32
-; SI: v_mul_f32
-; SI-NOT: v_mul_f32
+; GCN: v_mul_f32
+; GCN: v_mul_f32
+; GCN: v_mul_f32
+; GCN-NOT: v_mul_f32
 define void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 {
  %a = fmul float %x, 5.0
  %b = fsub float -0.0, %a
@ -89,4 +88,4 @@ define void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 {
  ret void
 }

-attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind }
--- a/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
+++ b/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
@ -1,20 +1,17 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s

-; BOTH-LABEL: {{^}}main:
-; BOTH: s_mov_b32 m0, s0
+; GCN-LABEL: {{^}}main:
+; GCN: s_mov_b32 m0, s0
 ; VI-NEXT: s_nop 0
-; BOTH-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
-; BOTH-NEXT: s_endpgm
+; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
+; GCN-NEXT: s_endpgm

 define amdgpu_gs void @main(i32 inreg %a) #0 {
-main_body:
  call void @llvm.SI.sendmsg(i32 3, i32 %a)
  ret void
 }

-; Function Attrs: nounwind
-declare void @llvm.SI.sendmsg(i32, i32) #1
+declare void @llvm.SI.sendmsg(i32, i32) #0

-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
--- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@ -14,7 +14,7 @@ entry:

 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1

-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind  }
 attributes #1 = { nounwind readnone }

 !llvm.dbg.cu = !{!0}
--- a/test/CodeGen/AMDGPU/mubuf.ll
+++ b/test/CodeGen/AMDGPU/mubuf.ll
@ -174,8 +174,7 @@ define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
  ret void
 }

-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)

-attributes #1 = { "unsafe-fp-math"="true" }
-attributes #3 = { nounwind readonly }
+attributes #0 = { nounwind readonly }
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@ -208,7 +208,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #1

 ; Function Attrs: readnone
-declare float @llvm.amdgcn.rsq.f32(float) #3
+declare float @llvm.amdgcn.rsq.f32(float) #1

 declare float @llvm.exp2.f32(float) #1

@ -394,10 +394,9 @@ define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg

 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1

-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
-attributes #2 = { readonly }
-attributes #3 = { readnone }
+attributes #2 = { nounwind readonly }

 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", null}
--- a/test/CodeGen/AMDGPU/si-spill-cf.ll
+++ b/test/CodeGen/AMDGPU/si-spill-cf.ll
@ -6,7 +6,7 @@

 ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
 ; SI-NOT: v_readlane_b32 [[SAVED]]
-define amdgpu_ps void @main() {
+define amdgpu_ps void @main() #0 {
 main_body:
  %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
  %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
@ -80,7 +80,7 @@ main_body:
 LOOP:                                             ; preds = %ENDIF2795, %main_body
  %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
  %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
  %67 = icmp sgt i32 %tid, 4
  br i1 %67, label %ENDLOOP, label %ENDIF

@ -490,25 +490,24 @@ ELSE2824:                                         ; preds = %ELSE2821
  br label %ENDIF2795
 }

-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1

 ; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #2
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1

 ; Function Attrs: nounwind readnone
-declare float @llvm.floor.f32(float) #2
+declare float @llvm.floor.f32(float) #1

 ; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #2
+declare float @llvm.sqrt.f32(float) #1

 ; Function Attrs: nounwind readnone
-declare float @llvm.minnum.f32(float, float) #2
+declare float @llvm.minnum.f32(float, float) #1

 ; Function Attrs: nounwind readnone
-declare float @llvm.maxnum.f32(float, float) #2
+declare float @llvm.maxnum.f32(float, float) #1

 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

-attributes #0 = { alwaysinline nounwind readnone }
-attributes #1 = { "enable-no-nans-fp-math"="true" }
-attributes #2 = { nounwind readnone }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@ -229,5 +229,5 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
 ;   ret void
 ; }

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind convergent }
--- a/test/CodeGen/AMDGPU/si-vector-hang.ll
+++ b/test/CodeGen/AMDGPU/si-vector-hang.ll
@ -90,7 +90,7 @@ entry:
  ret void
 }

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }

 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}

--- a/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/test/CodeGen/AMDGPU/split-smrd.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s

 ; FIXME: Move this to sgpr-copy.ll when this is fixed on VI.
 ; Make sure that when we split an smrd instruction in order to move it to
@ -38,7 +38,7 @@ declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>,

 declare i32 @llvm.SI.packf16(float, float) #1

-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

 !0 = !{!1, !1, i64 0, i32 1}
--- a/test/CodeGen/AMDGPU/store.ll
+++ b/test/CodeGen/AMDGPU/store.ll
@ -380,4 +380,4 @@ entry:
  ret void
 }

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
--- a/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@ -105,5 +105,5 @@ declare i32 @llvm.SI.packf16(float, float) #1

 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

-attributes #0 = { "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/v_mac.ll
+++ b/test/CodeGen/AMDGPU/v_mac.ll
@ -7,7 +7,7 @@
 ; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
 ; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
 ; GCN: buffer_store_dword [[C]]
-define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) {
+define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
 entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
@ -25,7 +25,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
-define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) {
+define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) #0 {
 entry:
  %tmp0 = fmul float 0.5, %in
  %tmp1 = fadd float %tmp0, 0.5
@ -36,7 +36,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_vvs:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) {
+define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) #0 {
 entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1

@ -51,7 +51,7 @@ entry:

 ; GCN-LABEL: {{^}}mac_ssv:
 ; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) {
+define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) #0 {
 entry:
  %c = load float, float addrspace(1)* %in

@ -64,7 +64,7 @@ entry:
 ; GCN-LABEL: {{^}}mac_mad_same_add:
 ; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
 ; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
-define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) {
+define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
 entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
@ -104,6 +104,46 @@ entry:
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

+  %neg_a = fsub float -0.0, %a
+  %tmp0 = fmul float %neg_a, %b
+  %tmp1 = fadd float %tmp0, %c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}unsafe_mad_sub0_src0:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
+  %neg_a = fsub float 0.0, %a
+  %tmp0 = fmul float %neg_a, %b
+  %tmp1 = fadd float %tmp0, %c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}safe_mad_sub0_src0:
+; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,
+; GCN: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[SUB0]]
+define void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
  %neg_a = fsub float 0.0, %a
  %tmp0 = fmul float %neg_a, %b
  %tmp1 = fadd float %tmp0, %c
@ -124,6 +164,26 @@ entry:
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

+  %neg_b = fsub float -0.0, %b
+  %tmp0 = fmul float %a, %neg_b
+  %tmp1 = fadd float %tmp0, %c
+
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}unsafe_mad_sub0_src1:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+entry:
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+  %a = load float, float addrspace(1)* %in
+  %b = load float, float addrspace(1)* %b_ptr
+  %c = load float, float addrspace(1)* %c_ptr
+
  %neg_b = fsub float 0.0, %b
  %tmp0 = fmul float %a, %neg_b
  %tmp1 = fadd float %tmp0, %c
@ -144,7 +204,7 @@ entry:
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

-  %neg_c = fsub float 0.0, %c
+  %neg_c = fsub float -0.0, %c
  %tmp0 = fmul float %a, %b
  %tmp1 = fadd float %tmp0, %neg_c

@ -152,4 +212,5 @@ entry:
  ret void
 }

-attributes #0 = { "true" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" }
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@ -26,7 +26,7 @@
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1024

-define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
+define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
 bb:
  %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
  %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
@ -493,7 +493,7 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float

 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1

-attributes #0 = { "enable-no-nans-fp-math"="true" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

 !0 = !{!1, !1, i64 0, i32 1}
--- a/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
@ -71,7 +71,7 @@ declare i32 @llvm.r600.read.global.size.y() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.global.size.z() #1

-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

 !opencl.kernels = !{!0, !1, !2}