[llvm] Fix missing FileCheck directive colons

https://reviews.llvm.org/D77352
2025-01-31 12:41:49 +01:00 · 2020-04-02 16:28:32 -06:00 · 2020-04-02 16:28:32 -06:00 · c9acf39233
commit c9acf39233
parent 6f09e0c039
95 changed files with 189 additions and 199 deletions
--- a/test/Analysis/AliasSet/saturation.ll
+++ b/test/Analysis/AliasSet/saturation.ll
@ -18,7 +18,7 @@ define void @allmust() {
  ret void
 }

-; CHECK-LABEL :'mergemay'
+; CHECK-LABEL: 'mergemay'
 ; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %a, LocationSize::precise(4)), (i32* %a1, LocationSize::precise(4))
 ; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, LocationSize::precise(4))
 ; SAT: AliasSet[{{.*}}, 2] may alias, Mod forwarding to 0x[[FWD:[0-9a-f]*]]
--- a/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/test/Analysis/CostModel/AMDGPU/fmul.ll
@ -3,7 +3,7 @@
 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s
 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s

-; ALL: 'fmul_f32'
+; ALL-LABEL: 'fmul_f32'
 ; ALL: estimated cost of 1 for {{.*}} fmul float
 define amdgpu_kernel void @fmul_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
  %vec = load float, float addrspace(1)* %vaddr
@ -12,7 +12,7 @@ define amdgpu_kernel void @fmul_f32(float addrspace(1)* %out, float addrspace(1)
  ret void
 }

-; ALL: 'fmul_v2f32'
+; ALL-LABEL: 'fmul_v2f32'
 ; ALL: estimated cost of 2 for {{.*}} fmul <2 x float>
 define amdgpu_kernel void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
  %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
@ -21,7 +21,7 @@ define amdgpu_kernel void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float
  ret void
 }

-; ALL: 'fmul_v3f32'
+; ALL-LABEL: 'fmul_v3f32'
 ; Allow for 4 when v3f32 is illegal and TargetLowering thinks it needs widening,
 ; and 3 when it is legal.
 ; ALL: estimated cost of {{[34]}} for {{.*}} fmul <3 x float>
@ -32,7 +32,7 @@ define amdgpu_kernel void @fmul_v3f32(<3 x float> addrspace(1)* %out, <3 x float
  ret void
 }

-; ALL: 'fmul_v5f32'
+; ALL-LABEL: 'fmul_v5f32'
 ; Allow for 8 when v5f32 is illegal and TargetLowering thinks it needs widening,
 ; and 5 when it is legal.
 ; ALL: estimated cost of {{[58]}} for {{.*}} fmul <5 x float>
@ -43,7 +43,7 @@ define amdgpu_kernel void @fmul_v5f32(<5 x float> addrspace(1)* %out, <5 x float
  ret void
 }

-; ALL: 'fmul_f64'
+; ALL-LABEL: 'fmul_f64'
 ; FASTF64: estimated cost of 2 for {{.*}} fmul double
 ; SLOWF64: estimated cost of 3 for {{.*}} fmul double
 define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
@ -53,7 +53,7 @@ define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(
  ret void
 }

-; ALL: 'fmul_v2f64'
+; ALL-LABEL: 'fmul_v2f64'
 ; FASTF64: estimated cost of 4 for {{.*}} fmul <2 x double>
 ; SLOWF64: estimated cost of 6 for {{.*}} fmul <2 x double>
 define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
@ -63,7 +63,7 @@ define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
  ret void
 }

-; ALL: 'fmul_v3f64'
+; ALL-LABEL: 'fmul_v3f64'
 ; FASTF64: estimated cost of 6 for {{.*}} fmul <3 x double>
 ; SLOWF64: estimated cost of 9 for {{.*}} fmul <3 x double>
 define amdgpu_kernel void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
@ -73,7 +73,7 @@ define amdgpu_kernel void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x doub
  ret void
 }

-; ALL: 'fmul_f16'
+; ALL-LABEL: 'fmul_f16'
 ; ALL: estimated cost of 1 for {{.*}} fmul half
 define amdgpu_kernel void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
  %vec = load half, half addrspace(1)* %vaddr
@ -82,9 +82,9 @@ define amdgpu_kernel void @fmul_f16(half addrspace(1)* %out, half addrspace(1)*
  ret void
 }

-; ALL: 'fmul_v2f16'
-; SLOWF16 estimated cost of 2 for {{.*}} fmul <2 x half>
-; FASTF16 estimated cost of 1 for {{.*}} fmul <2 x half>
+; ALL-LABEL: 'fmul_v2f16'
+; SLOWF16: estimated cost of 2 for {{.*}} fmul <2 x half>
+; FASTF16: estimated cost of 1 for {{.*}} fmul <2 x half>
 define amdgpu_kernel void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
  %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
  %add = fmul <2 x half> %vec, %b
@ -92,9 +92,9 @@ define amdgpu_kernel void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
  ret void
 }

-; ALL: 'fmul_v3f16'
-; SLOWF16 estimated cost of 4 for {{.*}} fmul <3 x half>
-; FASTF16 estimated cost of 2 for {{.*}} fmul <3 x half>
+; ALL-LABEL: 'fmul_v3f16'
+; SLOWF16: estimated cost of 4 for {{.*}} fmul <3 x half>
+; FASTF16: estimated cost of 2 for {{.*}} fmul <3 x half>
 define amdgpu_kernel void @fmul_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 {
  %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr
  %add = fmul <3 x half> %vec, %b
@ -102,7 +102,7 @@ define amdgpu_kernel void @fmul_v3f16(<3 x half> addrspace(1)* %out, <3 x half>
  ret void
 }

-; ALL: 'fmul_v4f16'
+; ALL-LABEL: 'fmul_v4f16'
 ; SLOWF16: estimated cost of 4 for {{.*}} fmul <4 x half>
 ; FASTF16: estimated cost of 2 for {{.*}} fmul <4 x half>
 define amdgpu_kernel void @fmul_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
--- a/test/Analysis/MemorySSA/volatile-clobber.ll
+++ b/test/Analysis/MemorySSA/volatile-clobber.ll
@ -23,7 +23,7 @@ define i32 @foo() {
 }

 ; Ensuring we allow hoisting nonvolatile loads around volatile loads.
-; CHECK-LABEL define void @volatile_only
+; CHECK-LABEL: define void @volatile_only
 define void @volatile_only(i32* %arg1, i32* %arg2) {
  ; Trivially NoAlias/MustAlias
  %a = alloca i32
@ -51,7 +51,7 @@ define void @volatile_only(i32* %arg1, i32* %arg2) {
 }

 ; Ensuring that volatile atomic operations work properly.
-; CHECK-LABEL define void @volatile_atomics
+; CHECK-LABEL: define void @volatile_atomics
 define void @volatile_atomics(i32* %arg1, i32* %arg2) {
  %a = alloca i32
  %b = alloca i32
--- a/test/Analysis/ValueTracking/known-nonnull-at.ll
+++ b/test/Analysis/ValueTracking/known-nonnull-at.ll
@ -170,7 +170,6 @@ define i32 @test_null_after_load_addrspace(i32 addrspace(1)* %0) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
-; CHECK-NEXT     ret i32 %4
  %2 = load i32, i32 addrspace(1)* %0, align 4
  %3 = icmp eq i32 addrspace(1)* %0, null
  %4 = select i1 %3, i32 %2, i32 1
--- a/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@ -49,7 +49,7 @@ define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
  ret void
 }

-; CHECK-LABEL @abs
+; CHECK-LABEL: @abs
 define void @abs(i32 %a, i64 %b) {
 ; CHECK-DAG: [[negi:%[a-zA-Z0-9.]+]] = sub i32 0, %a
 ; CHECK-DAG: [[cmpi:%[a-zA-Z0-9.]+]] = icmp sge i32 %a, 0
--- a/test/Assembler/immarg-param-attribute.ll
+++ b/test/Assembler/immarg-param-attribute.ll
@ -14,7 +14,7 @@ define void @call_llvm.test.immarg.intrinsic.i32() {
  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
  call void @llvm.test.immarg.intrinsic.i32(i32 0)

-  ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1)
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 1)
  call void @llvm.test.immarg.intrinsic.i32(i32 1)

  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5)
--- a/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@ -120,7 +120,7 @@ entry:
  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
  %retval =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
-; CHECK-V8a :       sqrdmulh    v1.4h, v1.4h, v2.h[3]
+; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.h[3]
 ; CHECK-V81a:       sqrdmlah    v0.4h, v1.4h, v2.h[3]
 ; CHECK-V81a-apple: sqrdmlah.4h v0,    v1,    v2[3]
  ret <4 x i16> %retval
--- a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@ -817,7 +817,7 @@ define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
 ; GISEL: test_v4f32.floor:
 define %v4f32 @test_v4f32.floor(%v4f32 %a) {
  ; CHECK: frintm.4s
-  ; GISEL frintm.4s
+  ; GISEL: frintm.4s
  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
  ret %v4f32 %1
 }
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-0.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-0.ll
@ -9,6 +9,6 @@ attributes #0 = { "branch-target-enforcement" }
 ; Both attributes present in a file with no functions.
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	3
+; ASM-NEXT:	.word	3

 ; OBJ: Properties: aarch64 feature: BTI, PAC
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-1.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-1.ll
@ -13,6 +13,6 @@ attributes #0 = { "branch-target-enforcement" }
 ; BTI attribute present
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	1
+; ASM-NEXT:	.word	1

 ; OBJ: Properties: aarch64 feature: BTI
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-2.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-2.ll
@ -13,6 +13,6 @@ attributes #0 = { "sign-return-address"="all" }
 ; PAC attribute present
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	2
+; ASM-NEXT:	.word	2

 ; OBJ: Properties: aarch64 feature: PAC
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-3.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-3.ll
@ -13,6 +13,6 @@ attributes #0 = { "branch-target-enforcement" "sign-return-address"="non-leaf" }
 ; Both attribute present
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	3
+; ASM-NEXT:	.word	3

 ; OBJ: Properties: aarch64 feature: BTI, PAC
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-4.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-4.ll
@ -20,6 +20,6 @@ attributes #1 = { "branch-target-enforcement" }
 ; Only the common atttribute (BTI)
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	1
+; ASM-NEXT:	.word	1

 ; OBJ: Properties: aarch64 feature: BTI
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-5.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-5.ll
@ -21,6 +21,6 @@ attributes #1 = { "sign-return-address"="all" }
 ; ASM: warning: not setting BTI in feature flags
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	2
+; ASM-NEXT:	.word	2

 ; OBJ: Properties: aarch64 feature: PAC
--- a/test/CodeGen/AArch64/note-gnu-property-pac-bti-8.ll
+++ b/test/CodeGen/AArch64/note-gnu-property-pac-bti-8.ll
@ -16,6 +16,6 @@ attributes #0 = { "branch-target-enforcement" }
 ; Declarations don't prevent setting BTI
 ; ASM:	    .word	3221225472
 ; ASM-NEXT:	.word	4
-; ASM-NEXT	.word	1
+; ASM-NEXT:	.word	1

 ; OBJ: Properties: aarch64 feature: BTI
--- a/test/CodeGen/AArch64/stp-opt-with-renaming.mir
+++ b/test/CodeGen/AArch64/stp-opt-with-renaming.mir
@ -175,7 +175,7 @@ body:             |
 ...
 ---
 # CHECK-LABEL: name: test6
-# CHECK-LABEL  bb.0:
+# CHECK-LABEL: bb.0:
 # CHECK:    liveins: $x0, $x1, $q3

 # CHECK:         renamable $q9 = LDRQui $x0, 0 :: (load 16)
@ -288,7 +288,7 @@ body:             |
 # TODO: Can we check that all instructions that require renaming also support
 #       the second 16 Q registers?
 # CHECK-LABEL: name: test9
-# CHECK-LABEL  bb.0:
+# CHECK-LABEL: bb.0:
 # CHECK:    liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7

 # CHECK:         renamable $q9 = LDRQui $x0, 0 :: (load 16)
@ -322,7 +322,7 @@ body:             |
 ---
 # The livein $q7 is killed early, so we can re-use it for renaming.
 # CHECK-LABEL: name: test10
-# CHECK-LABEL  bb.0:
+# CHECK-LABEL: bb.0:
 # CHECK:    liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7

 # CHECK:         renamable $q7 = FADDv2f64 renamable $q7, renamable $q7
--- a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
+++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workgroup.id.x() #0
 declare i32 @llvm.amdgcn.workgroup.id.y() #0
 declare i32 @llvm.amdgcn.workgroup.id.z() #0

-; ALL-LABEL {{^}}test_workgroup_id_x:
+; ALL-LABEL: {{^}}test_workgroup_id_x:

 ; CO-V2: .amd_kernel_code_t
 ; CO-V2: user_sgpr_count = 6
@ -40,7 +40,7 @@ define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 {
  ret void
 }

-; ALL-LABEL {{^}}test_workgroup_id_y:
+; ALL-LABEL: {{^}}test_workgroup_id_y:
 ; CO-V2: user_sgpr_count = 6
 ; CO-V2: enable_sgpr_workgroup_id_x = 1
 ; CO-V2: enable_sgpr_workgroup_id_y = 1
@ -67,7 +67,7 @@ define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 {
  ret void
 }

-; ALL-LABEL {{^}}test_workgroup_id_z:
+; ALL-LABEL: {{^}}test_workgroup_id_z:
 ; CO-V2: user_sgpr_count = 6
 ; CO-V2: enable_sgpr_workgroup_id_x = 1
 ; CO-V2: enable_sgpr_workgroup_id_y = 0
--- a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@ -13,7 +13,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; MESA: .long 47180
 ; MESA-NEXT: .long 132{{$}}

-; ALL-LABEL {{^}}test_workitem_id_x:
+; ALL-LABEL: {{^}}test_workitem_id_x:
 ; CO-V2: enable_vgpr_workitem_id = 0

 ; ALL-NOT: v0
@ -28,7 +28,7 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
 ; MESA: .long 47180
 ; MESA-NEXT: .long 2180{{$}}

-; ALL-LABEL {{^}}test_workitem_id_y:
+; ALL-LABEL: {{^}}test_workitem_id_y:
 ; CO-V2: enable_vgpr_workitem_id = 1

 ; ALL-NOT: v1
@ -43,7 +43,7 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
 ; MESA: .long 47180
 ; MESA-NEXT: .long 4228{{$}}

-; ALL-LABEL {{^}}test_workitem_id_z:
+; ALL-LABEL: {{^}}test_workitem_id_z:
 ; CO-V2: enable_vgpr_workitem_id = 2

 ; ALL-NOT: v2
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@ -296,7 +296,7 @@ bb4:
 ; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP]]
 ; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}}
 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
-; GCN-NEXT .Lfunc_end{{[0-9]+}}:
+; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
 define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
 entry:
  br label %loop
--- a/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/test/CodeGen/AMDGPU/call-argument-types.ll
@ -534,7 +534,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
 ; GCN-DAG: v_mov_b32_e32 v2, 3
 ; GCN-DAG: v_mov_b32_e32 v3, 4
 ; GCN-DAG: v_mov_b32_e32 v4, 5
-; GCN-NOT v5,
+; GCN-NOT: v5,
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
  call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
--- a/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/test/CodeGen/AMDGPU/ds_read2.ll
@ -57,7 +57,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out)
 ; CI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0

-; GCN-NOT ds_read2_b32
+; GCN-NOT: ds_read2_b32
 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
 ; GCN: s_endpgm
@ -272,7 +272,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0
 ; CI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0

-; GCN-NOT ds_read2_b32
+; GCN-NOT: ds_read2_b32
 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
 ; GCN: s_endpgm
--- a/test/CodeGen/AMDGPU/flat-offset-bug.ll
+++ b/test/CodeGen/AMDGPU/flat-offset-bug.ll
@ -26,7 +26,7 @@ define void @global_inst_offset(i32 addrspace(1)* nocapture %p) {
 }

 ; GCN-LABEL: load_i16_lo:
-; GFX9 : flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX9:  flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
 ; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
 define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) {
  %gep = getelementptr inbounds i16, i16* %arg, i32 4
--- a/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.f32.ll
@ -81,7 +81,7 @@ define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspa
 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]

-; SI-DENORM buffer_store_dword [[RESULT]]
+; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
--- a/test/CodeGen/AMDGPU/function-args.ll
+++ b/test/CodeGen/AMDGPU/function-args.ll
@ -596,7 +596,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
 ; GCN: buffer_store_byte [[TRUNC_ARG1_I1]], off
 ; GCN: buffer_store_byte [[LOAD_ARG2]], off
 ; GCN: buffer_store_short [[LOAD_ARG3]], off
-; GFX89 buffer_store_short [[LOAD_ARG4]], off
+; GFX89: buffer_store_short [[LOAD_ARG4]], off

 ; CI: buffer_store_short [[CVT_ARG4]], off
 define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 {
--- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@ -34,7 +34,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
  ret void
 }

-; ERR error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy
+; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy
 ; GCN: ; illegal copy v[0:15] to s[16:31]
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
  %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"()
--- a/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
+++ b/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
@ -305,7 +305,7 @@ body:             |
 # GCN-LABEL: name: and_execz_imm_vccz_liveout_scc
 # GCN:      $vcc = S_AND_B64 $exec, -1, implicit-def $scc
 # GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
-# GCN-NEXT  S_ENDPGM 0, implicit $scc
+# GCN-NEXT: S_ENDPGM 0, implicit $scc
 name:            and_execz_imm_vccz_liveout_scc
 body:             |
  bb.0:
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workgroup.id.x() #0
 declare i32 @llvm.amdgcn.workgroup.id.y() #0
 declare i32 @llvm.amdgcn.workgroup.id.z() #0

-; ALL-LABEL {{^}}test_workgroup_id_x:
+; ALL-LABEL: {{^}}test_workgroup_id_x:

 ; CO-V2: .amd_kernel_code_t
 ; CO-V2: user_sgpr_count = 6
@ -40,7 +40,7 @@ define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 {
  ret void
 }

-; ALL-LABEL {{^}}test_workgroup_id_y:
+; ALL-LABEL: {{^}}test_workgroup_id_y:
 ; CO-V2: user_sgpr_count = 6
 ; CO-V2: enable_sgpr_workgroup_id_x = 1
 ; CO-V2: enable_sgpr_workgroup_id_y = 1
@ -67,7 +67,7 @@ define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 {
  ret void
 }

-; ALL-LABEL {{^}}test_workgroup_id_z:
+; ALL-LABEL: {{^}}test_workgroup_id_z:
 ; CO-V2: user_sgpr_count = 6
 ; CO-V2: enable_sgpr_workgroup_id_x = 1
 ; CO-V2: enable_sgpr_workgroup_id_y = 0
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@ -13,7 +13,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; MESA: .long 47180
 ; MESA-NEXT: .long 132{{$}}

-; ALL-LABEL {{^}}test_workitem_id_x:
+; ALL-LABEL: {{^}}test_workitem_id_x:
 ; CO-V2: enable_vgpr_workitem_id = 0

 ; ALL-NOT: v0
@ -28,7 +28,7 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
 ; MESA: .long 47180
 ; MESA-NEXT: .long 2180{{$}}

-; ALL-LABEL {{^}}test_workitem_id_y:
+; ALL-LABEL: {{^}}test_workitem_id_y:
 ; CO-V2: enable_vgpr_workitem_id = 1

 ; ALL-NOT: v1
@ -43,7 +43,7 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
 ; MESA: .long 47180
 ; MESA-NEXT: .long 4228{{$}}

-; ALL-LABEL {{^}}test_workitem_id_z:
+; ALL-LABEL: {{^}}test_workitem_id_z:
 ; CO-V2: enable_vgpr_workitem_id = 2

 ; ALL-NOT: v2
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@ -95,14 +95,14 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(

 ; VI-DENORM: s_movk_i32 [[KA:s[0-9]+]], 0x4200
 ; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], [[KA]], v[[C_F16]]
-; VI-DENORM buffer_store_short [[RESULT]]
+; VI-DENORM: buffer_store_short [[RESULT]]

 ; GFX10-FLUSH: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x4200, v[[A_F16]]
 ; GFX10-FLUSH: v_add_f16_e32 [[ADD:v[0-9]+]], [[MUL]], v[[C_F16]]
 ; GFX10-FLUSH: buffer_store_short [[ADD]]

 ; GFX10-DENORM: v_fmac_f16_e32 v[[C_F16]], 0x4200, v[[A_F16]]
-; GFX10-DENORM buffer_store_short v[[C_F16]],
+; GFX10-DENORM: buffer_store_short v[[C_F16]],

 ; GCN: s_endpgm
 define amdgpu_kernel void @fmuladd_f16_imm_b(
--- a/test/CodeGen/AMDGPU/llvm.rint.f64.ll
+++ b/test/CodeGen/AMDGPU/llvm.rint.f64.ll
@ -7,7 +7,7 @@

 ; SI-DAG: v_add_f64
 ; SI-DAG: v_add_f64
-; SI-DAG v_cmp_gt_f64_e64
+; SI-DAG: v_cmp_gt_f64_e64
 ; SI: v_cndmask_b32
 ; SI: v_cndmask_b32
 ; SI: s_endpgm
--- a/test/CodeGen/AMDGPU/merge-m0.mir
+++ b/test/CodeGen/AMDGPU/merge-m0.mir
@ -242,7 +242,7 @@ body:             |
 ...

 # GCN-LABEL: name: move-m0-with-prologue
-# GCN $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+# GCN: $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
 # GCN: $m0 = S_MOV_B32 -1
 # GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
 # GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
--- a/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@ -28,7 +28,7 @@
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W64-LABEL  bb.2:
+# W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]

 # W32-LABEL: name: idxen
@ -51,7 +51,7 @@
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
 # W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W32-LABEL  bb.2:
+# W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
 name:            idxen
@ -97,7 +97,7 @@ body:             |
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W64-LABEL  bb.2:
+# W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]

 # W32-LABEL: name: offen
@ -120,7 +120,7 @@ body:             |
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
 # W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W32-LABEL  bb.2:
+# W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
 name:            offen
@ -166,7 +166,7 @@ body:             |
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W64-LABEL  bb.2:
+# W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]

 # W32-LABEL: name: bothen
@ -189,7 +189,7 @@ body:             |
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
 # W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W32-LABEL  bb.2:
+# W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
 name:            bothen
@ -272,7 +272,7 @@ body:             |
 # W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
 # W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W64-NO-ADDR64-LABEL  bb.2:
+# W64-NO-ADDR64-LABEL:  bb.2:
 # W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]

 # W32: successors: %bb.1({{.*}})
@ -293,7 +293,7 @@ body:             |
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
 # W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
-# W32-LABEL  bb.2:
+# W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]

 # ADDR64: [[VRSRC:%[0-9]+]]:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
--- a/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
@ -3,7 +3,7 @@

 ; ALL-LABEL: {{^}}promote_alloca_i32_array_array:
 ; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
-; NOOPTS-NOT ds_write
+; NOOPTS-NOT: ds_write
 ; OPTS: ds_write
 define amdgpu_kernel void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
 entry:
@ -20,7 +20,7 @@ entry:

 ; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
 ; ALL: workgroup_group_segment_byte_size = 0{{$}}
-; ALL-NOT ds_write
+; ALL-NOT: ds_write
 define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
 entry:
  %alloca = alloca [2 x [2 x i32]], addrspace(5)
--- a/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
+++ b/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
@ -325,7 +325,7 @@ define i32 @i32_one_out_arg_i32_1_use(i32* %val) #0 {
 ; CHECK-NEXT: %3 = call %unused_different_type @unused_different_type.body(i32* %0, float* undef)
 ; CHECK-NEXT: %4 = extractvalue %unused_different_type %3, 0
 ; CHECK-NEXT: store float %4, float* %1, align 4
-; CHECK-NEXT  ret void
+; CHECK-NEXT: ret void
 define void @unused_different_type(i32* %arg0, float* nocapture %arg1) #0 {
  store float 4.0, float* %arg1, align 4
  ret void
--- a/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/test/CodeGen/AMDGPU/salu-to-valu.ll
@ -265,7 +265,7 @@ entry:

 ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
-; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
+; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
 entry:
  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
--- a/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@ -58,7 +58,7 @@ body: |
 ---

 # GCN-LABEL: name: dead_illegal_physreg_copy
-# GCN %2:vgpr_32 = COPY $vgpr0
+# GCN: %2:vgpr_32 = COPY $vgpr0
 # GCN: %1:sreg_32_xm0 = IMPLICIT_DEF
 # GCN: S_ENDPGM 0, implicit %2

--- a/test/CodeGen/AMDGPU/si-scheduler.ll
+++ b/test/CodeGen/AMDGPU/si-scheduler.ll
@ -65,7 +65,7 @@ attributes #2 = { nounwind readonly }


 ; CHECK-LABEL: amdgpu_ps_main:
-; CHECK s_buffer_load_dword
+; CHECK: s_buffer_load_dword
 define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr {
 .entry:
  %tmp = insertelement <2 x i32> zeroinitializer, i32 %arg, i32 0
--- a/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@ -12,7 +12,7 @@
 ; GFX900:     buffer_load_dword v{{[0-9]}},
 ; GFX900:     buffer_load_dword v{{[0-9]}},
 ; GFX908-NOT: buffer_
-; GFX908-DAG  v_accvgpr_read_b32 v{{[0-9]}}, a0
+; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a0
 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1

 ; GCN:    NumVgprs: 10
@ -126,7 +126,7 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
 ; GCN-DAG:    buffer_store_dword v{{[0-9]}},
 ; GFX900:     buffer_load_dword v{{[0-9]}},
 ; GCN-DAG:    buffer_load_dword v{{[0-9]}},
-; GFX908-DAG  v_accvgpr_read_b32 v{{[0-9]}}, a1
+; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2
 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3
 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a4
@ -192,7 +192,7 @@ define amdgpu_kernel void @max_10_vgprs_spill_v32(<32 x float> addrspace(1)* %p)
 ; GFX900:     buffer_store_dword v
 ; GFX900:     buffer_load_dword v
 ; GFX908-NOT: buffer_
-; GFX908-DAG  v_accvgpr_read_b32
+; GFX908-DAG: v_accvgpr_read_b32

 ; GCN:    NumVgprs: 256
 ; GFX900: ScratchSize: 148
--- a/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/test/CodeGen/AMDGPU/udivrem24.ll
@ -122,7 +122,7 @@ define amdgpu_kernel void @udiv23_i32(i32 addrspace(1)* %out, i32 addrspace(1)*

 ; FUNC-LABEL: {{^}}udiv24_i32:
 ; SI: v_rcp_iflag
-; SI-NOT v_rcp_f32
+; SI-NOT: v_rcp_f32
 ; EG-NOT: RECIP_IEEE
 define amdgpu_kernel void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
@ -139,7 +139,7 @@ define amdgpu_kernel void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)*

 ; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32:
 ; SI: v_rcp_iflag
-; SI-NOT v_rcp_f32
+; SI-NOT: v_rcp_f32
 ; EG-NOT: RECIP_IEEE
 define amdgpu_kernel void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
@ -156,7 +156,7 @@ define amdgpu_kernel void @no_udiv24_u23_u24_i32(i32 addrspace(1)* %out, i32 add

 ; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32:
 ; SI: v_rcp_iflag
-; SI-NOT v_rcp_f32
+; SI-NOT: v_rcp_f32
 ; EG-NOT: RECIP_IEEE
 define amdgpu_kernel void @no_udiv24_u24_u23_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
--- a/test/CodeGen/ARM/acle-intrinsics.ll
+++ b/test/CodeGen/ARM/acle-intrinsics.ll
@ -71,7 +71,7 @@ define i32 @pack_unpack(i32 %a, i32 %b) nounwind {

 define i32 @sel(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: sel
-; CHECK sel r0, r0, r1
+; CHECK: sel r0, r0, r1
  %tmp = call i32 @llvm.arm.sel(i32 %a, i32 %b)
  ret i32 %tmp
 }
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@ -35,8 +35,8 @@ define void @test_basic() #0 !dbg !4 {

 ; ARM-linux:      pop     {r4, r5}
 ; ARM-linux:      .cfi_def_cfa_offset 0
-; ARM-linux       .cfi_same_value r4
-; ARM-linux       .cfi_same_value r5
+; ARM-linux:      .cfi_same_value r4
+; ARM-linux:      .cfi_same_value r5
 }

 !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
--- a/test/CodeGen/BPF/rodata_4.ll
+++ b/test/CodeGen/BPF/rodata_4.ll
@ -34,7 +34,7 @@ entry:
 ; CHECK:  *(u16 *)(r1 + 0) = r2
  ret i32 0
 }
-; CHECK  .section  .rodata,"a",@progbits
+; CHECK:  .section  .rodata,"a",@progbits

 ; Function Attrs: argmemonly nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
--- a/test/CodeGen/Hexagon/constant_compound.ll
+++ b/test/CodeGen/Hexagon/constant_compound.ll
@ -12,7 +12,7 @@
 ; rb = xor(rb, ra)

 ; CHECK: and(##65280,lsr(r
-; CHECK-NOT : ^= and
+; CHECK-NOT: ^= and

 define dso_local zeroext i16 @test_compound(i16 zeroext %varA, i16 zeroext %varB) local_unnamed_addr #0 {
 entry:
--- a/test/CodeGen/Hexagon/intrinsics/v65.ll
+++ b/test/CodeGen/Hexagon/intrinsics/v65.ll
@ -8,7 +8,7 @@ define i32 @A6_vcmpbeq_notany(i64 %a, i64 %b) {
  %c = call i32 @llvm.hexagon.A6.vcmpbeq.notany(i64 %a, i64 %b)
  ret i32 %c
 }
-; CHECK = !any8(vcmpb.eq(r1:0,r3:2))
+; CHECK: = !any8(vcmpb.eq(r1:0,r3:2))

 declare <16 x i32> @llvm.hexagon.V6.vabsb(<16 x i32>)
 define <16 x i32> @V6_vabsb(<16 x i32> %a) {
--- a/test/CodeGen/Mips/nmadd.ll
+++ b/test/CodeGen/Mips/nmadd.ll
@ -18,7 +18,7 @@ entry:
 ; CHECK-NM-64:             nmadd.s $f0, $f14, $f12, $f13
 ; CHECK-NM:                nmadd.s $f0, $f0, $f12, $f14
 ; CHECK-MM:                NMADD_S_MM
-; CHECK-NOT-NM-64          mul.s $f0, $f12, $f13
+; CHECK-NOT-NM-64:         mul.s $f0, $f12, $f13
 ; CHECK-NOT-NM-64:         neg.s $f0, $f0
 ; CHECK-NOT-NM:            mul.s $f0, $f12, $f14
 ; CHECK-NOT-NM:            neg.s $f0, $f0
@ -36,7 +36,7 @@ entry:
 ; CHECK-NM-64:             nmadd.d $f0, $f14, $f12, $f13
 ; CHECK-NM:                nmadd.d $f0, $f0, $f12, $f14
 ; CHECK-MM:                NMADD_D32_MM
-; CHECK-NOT-NM-64          mul.d $f0, $f12, $f13
+; CHECK-NOT-NM-64:         mul.d $f0, $f12, $f13
 ; CHECK-NOT-NM-64:         neg.d $f0, $f0
 ; CHECK-NOT-NM:            mul.d $f0, $f12, $f14
 ; CHECK-NOT-NM:            neg.d $f0, $f0
@ -54,7 +54,7 @@ entry:
 ; CHECK-NM-64:             nmsub.s $f0, $f14, $f12, $f13
 ; CHECK-NM:                nmsub.s $f0, $f0, $f12, $f14
 ; CHECK-MM:                NMSUB_S_MM
-; CHECK-NOT-NM-64          mul.s $f0, $f12, $f13
+; CHECK-NOT-NM-64:         mul.s $f0, $f12, $f13
 ; CHECK-NOT-NM-64:         neg.s $f0, $f0
 ; CHECK-NOT-NM:            mul.s $f0, $f12, $f14
 ; CHECK-NOT-NM:            neg.s $f0, $f0
@ -72,7 +72,7 @@ entry:
 ; CHECK-NM-64:             nmsub.d $f0, $f14, $f12, $f13
 ; CHECK-NM:                nmsub.d $f0, $f0, $f12, $f14
 ; CHECK-MM:                NMSUB_D32_MM
-; CHECK-NOT-NM-64          mul.d $f0, $f12, $f13
+; CHECK-NOT-NM-64:         mul.d $f0, $f12, $f13
 ; CHECK-NOT-NM-64:         neg.d $f0, $f0
 ; CHECK-NOT-NM:            mul.d $f0, $f12, $f14
 ; CHECK-NOT-NM:            neg.d $f0, $f0
--- a/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/test/CodeGen/NVPTX/atomics-sm60.ll
@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s

-; CHECK-LABEL .func test(
+; CHECK-LABEL: .func test(
 define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
 ; CHECK: atom.add.f64
  %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
@ -12,7 +12,7 @@ define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)*
  ret void
 }

-; CHECK-LABEL .func test2(
+; CHECK-LABEL: .func test2(
 define void @test2(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
 ; CHECK: atom.add.f64
  %r1 = atomicrmw fadd double* %dp0, double %d seq_cst
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s

-; CHECK-LABEL test_fabsf(
+; CHECK-LABEL: test_fabsf(
 define float @test_fabsf(float %f) {
 ; CHECK: abs.f32
  %x = call float @llvm.fabs.f32(float %f)
--- a/test/CodeGen/NVPTX/param-load-store.ll
+++ b/test/CodeGen/NVPTX/param-load-store.ll
@ -210,7 +210,7 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: test_v5i8(
 ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
 ; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
-; CHECK-DAG   ld.param.v4.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
+; CHECK-DAG:  ld.param.v4.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
 ; CHECK:      .param .align 8 .b8 param0[8];
 ; CHECK-DAG:  st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
@ -307,7 +307,7 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: test_v5i16(
 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
 ; CHECK-DAG:  ld.param.u16    [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
-; CHECK-DAG   ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
+; CHECK-DAG:  ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
 ; CHECK:      .param .align 16 .b8 param0[16];
 ; CHECK-DAG:  st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
@ -526,7 +526,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: test_v5i32(
 ; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
 ; CHECK-DAG:  ld.param.u32     [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
-; CHECK-DAG   ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
+; CHECK-DAG:  ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
 ; CHECK:      .param .align 32 .b8 param0[32];
 ; CHECK-DAG:  st.param.v4.b32  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b32     [param0+16], [[E4]];
--- a/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/test/CodeGen/NVPTX/sqrt-approx.ll
@ -8,7 +8,7 @@ declare double @llvm.sqrt.f64(double)

 ; -- reciprocal sqrt --

-; CHECK-LABEL test_rsqrt32
+; CHECK-LABEL: test_rsqrt32
 define float @test_rsqrt32(float %a) #0 {
 ; CHECK: rsqrt.approx.f32
  %val = tail call float @llvm.sqrt.f32(float %a)
@ -16,7 +16,7 @@ define float @test_rsqrt32(float %a) #0 {
  ret float %ret
 }

-; CHECK-LABEL test_rsqrt_ftz
+; CHECK-LABEL: test_rsqrt_ftz
 define float @test_rsqrt_ftz(float %a) #0 #1 {
 ; CHECK: rsqrt.approx.ftz.f32
  %val = tail call float @llvm.sqrt.f32(float %a)
@ -24,7 +24,7 @@ define float @test_rsqrt_ftz(float %a) #0 #1 {
  ret float %ret
 }

-; CHECK-LABEL test_rsqrt64
+; CHECK-LABEL: test_rsqrt64
 define double @test_rsqrt64(double %a) #0 {
 ; CHECK: rsqrt.approx.f64
  %val = tail call double @llvm.sqrt.f64(double %a)
@ -32,7 +32,7 @@ define double @test_rsqrt64(double %a) #0 {
  ret double %ret
 }

-; CHECK-LABEL test_rsqrt64_ftz
+; CHECK-LABEL: test_rsqrt64_ftz
 define double @test_rsqrt64_ftz(double %a) #0 #1 {
 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 ; CHECK: rsqrt.approx.f64
@ -43,42 +43,42 @@ define double @test_rsqrt64_ftz(double %a) #0 #1 {

 ; -- sqrt --

-; CHECK-LABEL test_sqrt32
+; CHECK-LABEL: test_sqrt32
 define float @test_sqrt32(float %a) #0 {
 ; CHECK: sqrt.rn.f32
  %ret = tail call float @llvm.sqrt.f32(float %a)
  ret float %ret
 }

-; CHECK-LABEL test_sqrt32_ninf
+; CHECK-LABEL: test_sqrt32_ninf
 define float @test_sqrt32_ninf(float %a) #0 {
 ; CHECK: sqrt.approx.f32
  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
  ret float %ret
 }

-; CHECK-LABEL test_sqrt_ftz
+; CHECK-LABEL: test_sqrt_ftz
 define float @test_sqrt_ftz(float %a) #0 #1 {
 ; CHECK: sqrt.rn.ftz.f32
  %ret = tail call float @llvm.sqrt.f32(float %a)
  ret float %ret
 }

-; CHECK-LABEL test_sqrt_ftz_ninf
+; CHECK-LABEL: test_sqrt_ftz_ninf
 define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
 ; CHECK: sqrt.approx.ftz.f32
  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
  ret float %ret
 }

-; CHECK-LABEL test_sqrt64
+; CHECK-LABEL: test_sqrt64
 define double @test_sqrt64(double %a) #0 {
 ; CHECK: sqrt.rn.f64
  %ret = tail call double @llvm.sqrt.f64(double %a)
  ret double %ret
 }

-; CHECK-LABEL test_sqrt64_ninf
+; CHECK-LABEL: test_sqrt64_ninf
 define double @test_sqrt64_ninf(double %a) #0 {
 ; There's no sqrt.approx.f64 instruction; we emit
 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
@ -89,14 +89,14 @@ define double @test_sqrt64_ninf(double %a) #0 {
  ret double %ret
 }

-; CHECK-LABEL test_sqrt64_ftz
+; CHECK-LABEL: test_sqrt64_ftz
 define double @test_sqrt64_ftz(double %a) #0 #1 {
 ; CHECK: sqrt.rn.f64
  %ret = tail call double @llvm.sqrt.f64(double %a)
  ret double %ret
 }

-; CHECK-LABEL test_sqrt64_ftz_ninf
+; CHECK-LABEL: test_sqrt64_ftz_ninf
 define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 ; CHECK: rsqrt.approx.f64
--- a/test/CodeGen/NVPTX/vector-loads.ll
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@ -7,7 +7,7 @@
 ;
 ; which will load two floats at once into scalar registers.

-; CHECK-LABEL foo
+; CHECK-LABEL: foo
 define void @foo(<2 x float>* %a) {
 ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
  %t1 = load <2 x float>, <2 x float>* %a
@ -16,7 +16,7 @@ define void @foo(<2 x float>* %a) {
  ret void
 }

-; CHECK-LABEL foo2
+; CHECK-LABEL: foo2
 define void @foo2(<4 x float>* %a) {
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
  %t1 = load <4 x float>, <4 x float>* %a
@ -25,7 +25,7 @@ define void @foo2(<4 x float>* %a) {
  ret void
 }

-; CHECK-LABEL foo3
+; CHECK-LABEL: foo3
 define void @foo3(<8 x float>* %a) {
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
@ -37,7 +37,7 @@ define void @foo3(<8 x float>* %a) {



-; CHECK-LABEL foo4
+; CHECK-LABEL: foo4
 define void @foo4(<2 x i32>* %a) {
 ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
  %t1 = load <2 x i32>, <2 x i32>* %a
@ -46,7 +46,7 @@ define void @foo4(<2 x i32>* %a) {
  ret void
 }

-; CHECK-LABEL foo5
+; CHECK-LABEL: foo5
 define void @foo5(<4 x i32>* %a) {
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
  %t1 = load <4 x i32>, <4 x i32>* %a
@ -55,7 +55,7 @@ define void @foo5(<4 x i32>* %a) {
  ret void
 }

-; CHECK-LABEL foo6
+; CHECK-LABEL: foo6
 define void @foo6(<8 x i32>* %a) {
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
@ -69,7 +69,7 @@ define void @foo6(<8 x i32>* %a) {
 ; computation was still too complex when LSV was called.
 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
-; CHECK-LABEL foo_complex
+; CHECK-LABEL: foo_complex
 define void @foo_complex(i8* nocapture readonly align 16 dereferenceable(134217728) %alloc0) {
  %targ0.1.typed = bitcast i8* %alloc0 to [1024 x [131072 x i8]]*
  %t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-kill-flag.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-kill-flag.mir
@ -3,7 +3,7 @@
 ---
 # LI + XFORM -> DFORM, no killed/dead flag fixup.
 name: testKillPassUpLI1
-#CHECK : name : testKillPassUpLI1
+#CHECK: name: testKillPassUpLI1
 tracksRegLiveness: true
 body: |
  bb.0.entry:
@ -19,7 +19,7 @@ body: |
 ---
 # LI + XFORM -> DFORM, fixup killed/dead flag for $x3, find no use, set def as
 # dead(LI8 is deleted).
-name : testKillPassUpLI2
+name: testKillPassUpLI2
 # CHECK: name: testKillPassUpLI2
 tracksRegLiveness: true
 body: |
--- a/test/CodeGen/PowerPC/f128-arith.ll
+++ b/test/CodeGen/PowerPC/f128-arith.ll
@ -71,7 +71,7 @@ entry:
 ; CHECK-LABEL: testLdNSt
 ; CHECK: lxvx
 ; CHECK: stxvx
-; CHECK-NEXT blr
+; CHECK-NEXT: blr
 }

 define void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) {
--- a/test/CodeGen/PowerPC/fastcc_stacksize.ll
+++ b/test/CodeGen/PowerPC/fastcc_stacksize.ll
@ -44,7 +44,7 @@ entry:
                 i32 signext 7, i32 signext 8) ret void
  ret void

-; CHECK-LABEL : WithoutParamArea3
+; CHECK-LABEL: WithoutParamArea3
 ; CHECK: stdu 1, -32(1)
 ; CHECK: blr
 }
@ -63,7 +63,7 @@ entry:
  call fastcc void @PassByValue(%"myClass::Mem"* byval nonnull align 8 undef);
  ret void

-; CHECK-LABEL : PassByValue
+; CHECK-LABEL: PassByValue
 ; CHECK: stdu 1, -32(1)
 ; CHECK: blr
 }
@ -131,7 +131,7 @@ entry:
                                               byval nonnull align 8 undef);
  ret void

-; CHECK-LABEL : AggMemExprEmitter
+; CHECK-LABEL: AggMemExprEmitter
 ; CHECK: stdu 1, -144(1)
 ; CHECK: blr
 }
--- a/test/CodeGen/PowerPC/spe.ll
+++ b/test/CodeGen/PowerPC/spe.ll
@ -43,7 +43,6 @@ define float @test_fmul(float %a, float %b) {
  entry:
  %v = fmul float %a, %b
  ret float %v
-; CHECK-LABEL @test_fmul
 }

 define float @test_fadd(float %a, float %b) {
@ -54,7 +53,6 @@ define float @test_fadd(float %a, float %b) {
  entry:
  %v = fadd float %a, %b
  ret float %v
-; CHECK-LABEL @test_fadd
 }

 define float @test_fsub(float %a, float %b) {
@ -65,7 +63,6 @@ define float @test_fsub(float %a, float %b) {
  entry:
  %v = fsub float %a, %b
  ret float %v
-; CHECK-LABEL @test_fsub
 }

 define float @test_fneg(float %a) {
@ -76,8 +73,6 @@ define float @test_fneg(float %a) {
  entry:
  %v = fsub float -0.0, %a
  ret float %v
-
-; CHECK-LABEL @test_fneg
 }

 define float @test_dtos(double %a) {
--- a/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@ -3,7 +3,6 @@
 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC32

 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
-; PPC64-LABEL muloti_test:
 ; PPC64-LABEL: muloti_test:
 ; PPC64:       # %bb.0: # %start
 ; PPC64-NEXT:    mulld 8, 5, 4
@ -151,7 +150,6 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    addi 1, 1, 80
 ; PPC32-NEXT:    mtlr 0
 ; PPC32-NEXT:    blr
-; PPC32-LABEL muloti_test:
 start:
  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
  %1 = extractvalue { i128, i1 } %0, 0
--- a/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
+++ b/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s

 define void @ILLBeBack() #0 {
-; CHECK-LABEL @ILLBeBack
+; CHECK-LABEL: @ILLBeBack
 ; CHECK: bne {{[0-9]+}}, [[LABEL:\.[a-zA-Z0-9_]+]]
 ; CHECK: [[LABEL]]:
 ; CHECK: bl __xray_FunctionExit
--- a/test/CodeGen/Thumb2/float-ops.ll
+++ b/test/CodeGen/Thumb2/float-ops.ll
@ -277,7 +277,7 @@ define float @select_f(float %a, float %b, i1 %c) {
 define double @select_d(double %a, double %b, i1 %c) {
 ; CHECK-LABEL: select_d:
 ; NONE: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
-; NONE  ands    [[REG]], [[REG]], #1
+; NONE: ands    [[REG]], [[REG]], #1
 ; NONE-DAG: moveq   r0, r2
 ; NONE-DAG: moveq   r1, r3
 ; SP: ands r0, r0, #1
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@ -315,7 +315,6 @@ entry:
 }

 define i32 * @test_memop_i32(i32 * %p1) {
-;X64    liveins: $rdi
  ; X32-LABEL: name: test_memop_i32
  ; X32: bb.1 (%ir-block.0):
  ; X32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
--- a/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@ -83,7 +83,6 @@ define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounw
 ; BTVER2-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; BTVER2-NEXT:    addq $56, %rsp
 ; BTVER2-NEXT:    retq
-; DISABLE-VZ       # %bb.0:
  %tmp = load <4 x float>, <4 x float>* @x, align 16
  %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
  store <4 x float> %call, <4 x float>* @x, align 16
--- a/test/CodeGen/X86/inline-0bh.ll
+++ b/test/CodeGen/X86/inline-0bh.ll
@ -6,7 +6,7 @@ define i32 @PR31007() {
 ; CHECK-LABEL: PR31007:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:  #APP
-; CHECK   :    addb $11, %al
+; CHECK:       addb $11, %al
 ; CHECK:       #NO_APP
 ; CHECK-NEXT:  xorl %eax, %eax
 ; CHECK-NEXT:  retq
--- a/test/CodeGen/X86/scavenger.mir
+++ b/test/CodeGen/X86/scavenger.mir
@ -44,6 +44,6 @@ name: func3
 tracksRegLiveness: true
 body: |
  bb.0:
-    ; CHECK dead {{\$e[a-z]+}} = MOV32ri 42
+    ; CHECK: dead {{\$e[a-z]+}} = MOV32ri 42
    dead %0 : gr32 = MOV32ri 42
 ...
--- a/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir
+++ b/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir
@ -42,12 +42,12 @@
 # CHECK-NEXT: MOV32rr
 # CHECK-NEXT: ADD32ri8
 # CHECK-NEXT: DBG_VALUE $ebx, $noreg, !{{[0-9]+}},
-# CHECK-SAME                  !DIExpression(DW_OP_LLVM_fragment, 32, 32)
+# CHECK-SAME:                 !DIExpression(DW_OP_LLVM_fragment, 32, 32)
 # CHECK-NEXT: JMP_1

 # CHECK-LABEL: bb.3.bb3:
 # CHECK:      DBG_VALUE $ebx, $noreg, !{{[0-9]+}},
-# CHECK-SAME                  !DIExpression(DW_OP_LLVM_fragment, 32, 32)
+# CHECK-SAME:                 !DIExpression(DW_OP_LLVM_fragment, 32, 32)
 # CHECK-NEXT: DBG_VALUE $ax, $noreg, !{{[0-9]+}},
 # CHECK-SAME:                 !DIExpression(DW_OP_LLVM_fragment, 8, 16)
 # CHECK-NEXT: XOR32rr
--- a/test/DebugInfo/X86/objc-property-void.ll
+++ b/test/DebugInfo/X86/objc-property-void.ll
@ -4,7 +4,7 @@
 ; CHECK: DW_TAG_structure_type
 ; CHECK:  DW_AT_APPLE_objc_complete_type
 ; CHECK:  DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "Foo")
-; CHECK   DW_AT_APPLE_runtime_class [DW_FORM_data1]       (DW_LANG_ObjC)
+; CHECK:  DW_AT_APPLE_runtime_class [DW_FORM_data1]       (DW_LANG_ObjC)
 ; CHECK: DW_TAG_APPLE_property
 ; CHECK:  DW_AT_APPLE_property_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "foo")

--- a/test/Instrumentation/AddressSanitizer/basic-msvc64.ll
+++ b/test/Instrumentation/AddressSanitizer/basic-msvc64.ll
@ -13,7 +13,7 @@ define i32 @test_load(i32* %a) sanitize_address {
 ; CHECK-NEXT: %[[SHADOW:[^ ]*]] = load i64, i64* @__asan_shadow_memory_dynamic_address

 ; Shadow address is loaded and added into the whole offset computation.
-; CHECK add i64 %{{.*}}, %[[SHADOW] ]
+; CHECK: add i64 %{{.*}}, %[[SHADOW]]

 entry:
  %tmp1 = load i32, i32* %a, align 4
--- a/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll
+++ b/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll
@ -14,7 +14,7 @@ define i32 @test_load(i32* %a) sanitize_address {
 ; CHECK-NDS-NOT: __asan_shadow_memory_dynamic_address

 ; Shadow address is loaded and added into the whole offset computation.
-; CHECK-FDS add i64 %{{.*}}, %[[SHADOW] ]
+; CHECK-FDS: add i64 %{{.*}}, %[[SHADOW]]

 entry:
  %tmp1 = load i32, i32* %a, align 4
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@ -953,7 +953,7 @@ entry:

 attributes #0 = { "target-features"="+fxsr,+x87,-sse" }

-; CHECK call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}@__msan_va_arg_tls {{.*}}, i64 48
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}@__msan_va_arg_tls {{.*}}, i64 48

 declare i32 @InnerTailCall(i32 %a)

--- a/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll
+++ b/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll
@ -25,7 +25,7 @@ entry:
 ; compare (const, const) - should not be instrumented
  icmp slt i32 1, 0
 ; CHECK-NOT: call void @__sanitizer_cov_trace
-; CHECK icmp slt i32 1, 0
+; CHECK: icmp slt i32 1, 0

 ; compare variables of byte size
  %x = trunc i32 %a to i8
--- a/test/MC/AArch64/tme-error.s
+++ b/test/MC/AArch64/tme-error.s
@ -37,10 +37,10 @@ tcommit  sp

 tcancel
 // CHECK: error: too few operands for instruction
-// CHECK-NEXT tcancel
+// CHECK-NEXT: tcancel
 tcancel x0
 // CHECK: error: immediate must be an integer in range [0, 65535]
-// CHECK-NEXT tcancel
+// CHECK-NEXT: tcancel
 tcancel #65536
 // CHECK: error: immediate must be an integer in range [0, 65535]
 // CHECK-NEXT: tcancel #65536
--- a/test/MC/Disassembler/ARM/vstrldr_sys.txt
+++ b/test/MC/Disassembler/ARM/vstrldr_sys.txt
@ -92,7 +92,7 @@
 # CHECK: vldrhi fpscr, [r0] @ encoding: [0x90,0xed,0x80,0x2f]
 [0x90,0xed,0x80,0x2f]

-# ERROR-NOSEC invalid instruction encoding
+# ERROR-NOSEC: invalid instruction encoding
 # CHECK-NOMVE: vstr fpcxts, [r12, #508] @ encoding: [0xcc,0xed,0xff,0xef]
 # CHECK-NOVFP: vstr fpcxts, [r12, #508] @ encoding: [0xcc,0xed,0xff,0xef]
 # CHECK: vstr fpcxts, [r12, #508] @ encoding: [0xcc,0xed,0xff,0xef]
@ -213,85 +213,85 @@
 [0x7d,0xec,0x86,0xcf]

 # CHECK-NOSEC: vstr vpr, [r6, #500] @ encoding: [0xc6,0xed,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr vpr, [r6, #500] @ encoding: [0xc6,0xed,0xfd,0x8f]
 # CHECK: vstr vpr, [r6, #500] @ encoding: [0xc6,0xed,0xfd,0x8f]
 [0xc6,0xed,0xfd,0x8f]

 # CHECK-NOSEC: vstr p0, [lr, #-508] @ encoding: [0x4e,0xed,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr p0, [lr, #-508] @ encoding: [0x4e,0xed,0xff,0xaf]
 # CHECK: vstr p0, [lr, #-508] @ encoding: [0x4e,0xed,0xff,0xaf]
 [0x4e,0xed,0xff,0xaf]

 # CHECK-NOSEC: vstr vpr, [r6, #500]! @ encoding: [0xe6,0xed,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr vpr, [r6, #500]! @ encoding: [0xe6,0xed,0xfd,0x8f]
 # CHECK: vstr vpr, [r6, #500]! @ encoding: [0xe6,0xed,0xfd,0x8f]
 [0xe6,0xed,0xfd,0x8f]

 # CHECK-NOSEC: vstr p0, [lr, #-508]! @ encoding: [0x6e,0xed,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr p0, [lr, #-508]! @ encoding: [0x6e,0xed,0xff,0xaf]
 # CHECK: vstr p0, [lr, #-508]! @ encoding: [0x6e,0xed,0xff,0xaf]
 [0x6e,0xed,0xff,0xaf]

 # CHECK-NOSEC: vstr vpr, [r6], #500 @ encoding: [0xe6,0xec,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr vpr, [r6], #500 @ encoding: [0xe6,0xec,0xfd,0x8f]
 # CHECK: vstr vpr, [r6], #500 @ encoding: [0xe6,0xec,0xfd,0x8f]
 [0xe6,0xec,0xfd,0x8f]

 # CHECK-NOSEC: vstr p0, [lr], #-508 @ encoding: [0x6e,0xec,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr p0, [lr], #-508 @ encoding: [0x6e,0xec,0xff,0xaf]
 # CHECK: vstr p0, [lr], #-508 @ encoding: [0x6e,0xec,0xff,0xaf]
 [0x6e,0xec,0xff,0xaf]

 # CHECK-NOSEC: vstr p0, [sp], #-24 @ encoding: [0x6d,0xec,0x86,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vstr p0, [sp], #-24 @ encoding: [0x6d,0xec,0x86,0xaf]
 # CHECK: vstr p0, [sp], #-24 @ encoding: [0x6d,0xec,0x86,0xaf]
 [0x6d,0xec,0x86,0xaf]

 # CHECK-NOSEC: vldr vpr, [r6, #500] @ encoding: [0xd6,0xed,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr vpr, [r6, #500] @ encoding: [0xd6,0xed,0xfd,0x8f]
 # CHECK: vldr vpr, [r6, #500] @ encoding: [0xd6,0xed,0xfd,0x8f]
 [0xd6,0xed,0xfd,0x8f]

 # CHECK-NOSEC: vldr p0, [lr, #-508] @ encoding: [0x5e,0xed,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr p0, [lr, #-508] @ encoding: [0x5e,0xed,0xff,0xaf]
 # CHECK: vldr p0, [lr, #-508] @ encoding: [0x5e,0xed,0xff,0xaf]
 [0x5e,0xed,0xff,0xaf]

 # CHECK-NOSEC: vldr vpr, [r6, #500]! @ encoding: [0xf6,0xed,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr vpr, [r6, #500]! @ encoding: [0xf6,0xed,0xfd,0x8f]
 # CHECK: vldr vpr, [r6, #500]! @ encoding: [0xf6,0xed,0xfd,0x8f]
 [0xf6,0xed,0xfd,0x8f]

 # CHECK-NOSEC: vldr p0, [lr, #-508]! @ encoding: [0x7e,0xed,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr p0, [lr, #-508]! @ encoding: [0x7e,0xed,0xff,0xaf]
 # CHECK: vldr p0, [lr, #-508]! @ encoding: [0x7e,0xed,0xff,0xaf]
 [0x7e,0xed,0xff,0xaf]

 # CHECK-NOSEC: vldr vpr, [r6], #500 @ encoding: [0xf6,0xec,0xfd,0x8f]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr vpr, [r6], #500 @ encoding: [0xf6,0xec,0xfd,0x8f]
 # CHECK: vldr vpr, [r6], #500 @ encoding: [0xf6,0xec,0xfd,0x8f]
 [0xf6,0xec,0xfd,0x8f]

 # CHECK-NOSEC: vldr p0, [lr], #-508 @ encoding: [0x7e,0xec,0xff,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr p0, [lr], #-508 @ encoding: [0x7e,0xec,0xff,0xaf]
 # CHECK: vldr p0, [lr], #-508 @ encoding: [0x7e,0xec,0xff,0xaf]
 [0x7e,0xec,0xff,0xaf]

 # CHECK-NOSEC: vldr p0, [sp], #-24 @ encoding: [0x7d,0xec,0x86,0xaf]
-# ERROR-NOMVE invalid instruction encoding
+# ERROR-NOMVE: invalid instruction encoding
 # CHECK-NOVFP: vldr p0, [sp], #-24 @ encoding: [0x7d,0xec,0x86,0xaf]
 # CHECK: vldr p0, [sp], #-24 @ encoding: [0x7d,0xec,0x86,0xaf]
 [0x7d,0xec,0x86,0xaf]
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@ -841,10 +841,10 @@
 #CHECK: vabsdub 2, 3, 4
 0x10 0x43 0x24 0x03

-#CHECK vabsduh 2, 3, 4
+#CHECK: vabsduh 2, 3, 4
 0x10 0x43 0x24 0x43

-#CHECK vabsduw 2, 3, 4
+#CHECK: vabsduw 2, 3, 4
 0x10 0x43 0x24 0x83

 # CHECK: bcdcfn. 27, 31, 1
--- a/test/MC/Mips/mul-macro-variants.s
+++ b/test/MC/Mips/mul-macro-variants.s
@ -117,7 +117,7 @@ text_label:
 dmul $4, $5, $6
 # CHECK:        dmultu  $5, $6                  # encoding: [0x00,0xa6,0x00,0x1d]
 # CHECK:        mflo    $4                      # encoding: [0x00,0x00,0x20,0x12]
-# CHECK-TRAP    dmultu  $5, $6                  # encoding: [0x00,0xa6,0x00,0x1d]
+# CHECK-TRAP:   dmultu  $5, $6                  # encoding: [0x00,0xa6,0x00,0x1d]
 # CHECK-TRAP:   mflo    $4                      # encoding: [0x00,0x00,0x20,0x12]
 dmul $4, $5, 1
 # CHECK:        addiu   $1, $zero, 1            # encoding: [0x24,0x01,0x00,0x01]
--- a/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml
+++ b/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml
@ -53,7 +53,7 @@ Slices:
 #CHECK:     align:           12
 #CHECK: Slices:          
 #CHECK:   - !mach-o
-#CHECK      FileHeader:      
+#CHECK:     FileHeader:      
 #CHECK:       magic:           0xFEEDFACE
 #CHECK:       cputype:         0x00000007
 #CHECK:       cpusubtype:      0x00000003
@ -62,7 +62,7 @@ Slices:
 #CHECK:       sizeofcmds:      0
 #CHECK:       flags:           0x01218085
 #CHECK:   - !mach-o
-#CHECK      FileHeader:        
+#CHECK:     FileHeader:        
 #CHECK:       magic:           0xFEEDFACF
 #CHECK:       cputype:         0x01000007
 #CHECK:       cpusubtype:      0x80000003
--- a/test/Transforms/AtomicExpand/SPARC/libcalls.ll
+++ b/test/Transforms/AtomicExpand/SPARC/libcalls.ll
@ -86,7 +86,7 @@ define i128 @test_load_i128(i128* %arg) {
  ret i128 %ret
 }

-; CHECK-LABEL @test_store_i128(
+; CHECK-LABEL: @test_store_i128(
 ; CHECK:  %1 = bitcast i128* %arg to i8*
 ; CHECK:  %2 = alloca i128, align 8
 ; CHECK:  %3 = bitcast i128* %2 to i8*
--- a/test/Transforms/Attributor/nofree.ll
+++ b/test/Transforms/Attributor/nofree.ll
@ -51,7 +51,7 @@ define void @only_free(i8* nocapture %0) local_unnamed_addr #0 {

 ; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
 ; ATTRIBUTOR-NOT: nofree
-; ATTRIBUTOR-NEXT :define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr
+; ATTRIBUTOR-NEXT: define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr
 define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr #0 {
  tail call void @free_in_scc2(i8* %0) #1
  ret void
--- a/test/Transforms/CallSiteSplitting/callsite-instructions-before-call.ll
+++ b/test/Transforms/CallSiteSplitting/callsite-instructions-before-call.ll
@ -201,7 +201,7 @@ End:
 ; CHECK-LABEL: TBB.split:
 ; CHECK-NEXT: store i32 %v, i32* %a
 ; CHECK-NEXT: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2)
-; CHECK-NEXT br label %Tail
+; CHECK-NEXT: br label %Tail
 ; CHECK-LABEL: Tail:
 ; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ]
 ; CHECK: ret i32 %[[MERGED]]
@ -239,7 +239,7 @@ End:                                           ; preds = %CallSite, %TBB
 ; CHECK-NEXT: br label %CallSite
 ; CHECK-LABEL: TBB.split:
 ; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %l2)
-; CHECK-NEXT br label %CallSite
+; CHECK-NEXT: br label %CallSite
 ; CHECK-LABEL: CallSite:
 ; CHECK-NEXT:  %l = phi i32 [ %l1, %Header.split ], [ %l2, %TBB.split ]
 ; CHECK: call void @bari(i32 %l)
--- a/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll
+++ b/test/Transforms/CodeExtractor/PartialInlineLiveAcross.ll
@ -38,7 +38,7 @@ define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
 ; CHECK: codeRepl.i:
 ; CHECK:  call void @test.1.bb2()
 ; CHECK-NOT: load
-; CHECK  br
+; CHECK:  br

 bb:
  %tmp = tail call i32 @test(i32 %arg)
--- a/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll
+++ b/test/Transforms/CodeExtractor/PartialInlineNoLiveOut.ll
@ -41,7 +41,7 @@ define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
 ; CHECK: codeRepl.i:
 ; CHECK:  call void @test.1.bb2()
 ; CHECK-NOT: load
-; CHECK  br
+; CHECK:  br
 bb:
  %tmp = tail call i32 @test(i32 %arg)
  ret i32 %tmp
--- a/test/Transforms/EntryExitInstrumenter/mcount.ll
+++ b/test/Transforms/EntryExitInstrumenter/mcount.ll
@ -30,18 +30,18 @@ entry:
 ; CHECK: entry:
 ; CHECK-NEXT: call void @mcount()

-; CHECK-NEXT %0 = call i8* @llvm.returnaddress(i32 0)
-; CHECK-NEXT call void @__cyg_profile_func_enter(i8* bitcast (void ()* @root_function to i8*), i8* %0)
+; CHECK-NEXT: %0 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT: call void @__cyg_profile_func_enter(i8* bitcast (void ()* @root_function to i8*), i8* %0)

 ; Entry and exit calls, inlined from @leaf_function()
-; CHECK-NEXT %1 = call i8* @llvm.returnaddress(i32 0)
-; CHECK-NEXT call void @__cyg_profile_func_enter(i8* bitcast (void ()* @leaf_function to i8*), i8* %1)
-; CHECK-NEXT %2 = call i8* @llvm.returnaddress(i32 0)
-; CHECK-NEXT call void @__cyg_profile_func_exit(i8* bitcast (void ()* @leaf_function to i8*), i8* %2)
-; CHECK-NEXT %3 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT: %1 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT: call void @__cyg_profile_func_enter(i8* bitcast (void ()* @leaf_function to i8*), i8* %1)
+; CHECK-NEXT: %2 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT: call void @__cyg_profile_func_exit(i8* bitcast (void ()* @leaf_function to i8*), i8* %2)
+; CHECK-NEXT: %3 = call i8* @llvm.returnaddress(i32 0)

-; CHECK-NEXT call void @__cyg_profile_func_exit(i8* bitcast (void ()* @root_function to i8*), i8* %3)
-; CHECK-NEXT ret void
+; CHECK-NEXT: call void @__cyg_profile_func_exit(i8* bitcast (void ()* @root_function to i8*), i8* %3)
+; CHECK-NEXT: ret void
 }


--- a/test/Transforms/GuardWidening/loop-schedule.ll
+++ b/test/Transforms/GuardWidening/loop-schedule.ll
@ -11,7 +11,7 @@
 declare void @llvm.experimental.guard(i1,...)

 define void @iter(i32 %a, i32 %b, i1* %c_p) {
-; CHECK-LABEL @iter
+; CHECK-LABEL: @iter
 ; CHECK:  %cond_0 = icmp ult i32 %a, 10
 ; CHECK:  %cond_1 = icmp ult i32 %b, 10
 ; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
@ -37,7 +37,7 @@ leave:                                            ; preds = %leave.loopexit, %en
 }

 define void @within_loop(i32 %a, i32 %b, i1* %c_p) {
-; CHECK-LABEL @within_loop
+; CHECK-LABEL: @within_loop
 ; CHECK:  %cond_0 = icmp ult i32 %a, 10
 ; CHECK:  %cond_1 = icmp ult i32 %b, 10
 ; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
--- a/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
+++ b/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
@ -53,4 +53,4 @@ attributes #1 = { nounwind readnone speculatable }
 !29 = !DILocation(line: 10, column: 1, scope: !2)

 ; CHECK: attributes #0 = { nofree norecurse nounwind }
-; CHECK-NOT foo.coefficient1
+; CHECK-NOT: foo.coefficient1
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@ -314,7 +314,7 @@ define void @pr27490b(i8** %p1, i8** %p2) {
 ;; not representable in the IR.  This was pr29121.  The right long term
 ;; solution is to extend the IR to handle this case.
 define <2 x float> @no_atomic_vector_load(i64* %p) {
-; CHECK-LABEL @no_atomic_vector_load
+; CHECK-LABEL: @no_atomic_vector_load
 ; CHECK: load atomic i64, i64* %p unordered, align 8
  %load = load atomic i64, i64* %p unordered, align 8
  %.cast = bitcast i64 %load to <2 x float>
--- a/test/Transforms/InstCombine/debuginfo-dce2.ll
+++ b/test/Transforms/InstCombine/debuginfo-dce2.ll
@ -31,7 +31,7 @@ entry:
 ; CHECK-NOT: bitcast
 ; CHECK: call void @llvm.dbg.value(metadata i8* %p, metadata ![[Q_VAR:[0-9]+]], metadata !DIExpression())
 ; CHECK-NOT: bitcast
-; CHECK ret void
+; CHECK: ret void

 ; CHECK: ![[P_VAR]] = !DILocalVariable(name: "p", {{.*}})
 ; CHECK: ![[Q_VAR]] = !DILocalVariable(name: "q", {{.*}})
--- a/test/Transforms/InstCombine/intptr1.ll
+++ b/test/Transforms/InstCombine/intptr1.ll
@ -27,7 +27,7 @@ for.body:                                         ; preds = %for.body, %for.body
  store float %mul.i, float* %a.addr.03, align 4
  %add = getelementptr inbounds float, float* %tmp, i64 1
  %add.int = ptrtoint float* %add to i64
-; CHECK %add = getelementptr
+; CHECK: %add = getelementptr
 ; CHECK-NOT: ptrtoint float*
  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
 ; CHECK: %incdec.ptr = 
--- a/test/Transforms/LoopUnroll/peel-loop-negative.ll
+++ b/test/Transforms/LoopUnroll/peel-loop-negative.ll
@ -4,7 +4,7 @@
 ; to zero.
 define i32 @invariant_backedge_neg_1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @invariant_backedge_neg_1
-; CHECK-NOT    loop.peel{{.*}}:
+; CHECK-NOT:   loop.peel{{.*}}:
 ; CHECK:       loop:
 ; CHECK:         %i = phi
 ; CHECK:         %sum = phi
--- a/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@ -200,9 +200,9 @@ define void @narrowing_load_not_allowed(i8* noalias nocapture %A, i8* noalias no
 ; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body

 ; FOLDING-OPT:        vector.body:
-; FOLDING-OPT         call <8 x i16> @llvm.masked.load.v8i16.p0v8i16
-; FOLDING-OPT         call <8 x i8> @llvm.masked.load.v8i8.p0v8i8
-; FOLDING-OPT         call void @llvm.masked.store.v8i8.p0v8i8
+; FOLDING-OPT:        call <8 x i16> @llvm.masked.load.v8i16.p0v8i16
+; FOLDING-OPT:        call <8 x i8> @llvm.masked.load.v8i8.p0v8i8
+; FOLDING-OPT:        call void @llvm.masked.store.v8i8.p0v8i8
 ; FOLDING-OPT:        br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
  br label %for.body
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK:   add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[BR_LOC:[0-9]+]]
 ; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[BR_LOC]]
 ; CHECK: for.body
-; CHECK br i1{{.*}}, label %for.body,{{.*}}, !dbg ![[BR_LOC]],
+; CHECK: br i1{{.*}}, label %for.body,{{.*}}, !dbg ![[BR_LOC]],
 ; CHECK: ![[BR_LOC]] = !DILocation(line: 5,

 define i32 @f(i32* nocapture %a, i32 %size) #0 !dbg !4 {
--- a/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@ -129,7 +129,7 @@ define void @cannot_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) {
 ; CHECK:  br label %for

 ; CHECK-LABEL: for:                                              ; preds = %for, %preheader
-; CHECK  br i1 %exitcond, label %exit, label %for
+; CHECK:  br i1 %exitcond, label %exit, label %for

 ; CHECK-LABEL: exit:
 ; CHECK-NEXT:    ret void
@ -172,7 +172,7 @@ define void @cannot_sink_store(i32 %x, i32* %ptr, i64 %tc) {
 ; CHECK:  br label %for

 ; CHECK-LABEL: for:                                              ; preds = %for, %preheader
-; CHECK  br i1 %exitcond, label %exit, label %for
+; CHECK:  br i1 %exitcond, label %exit, label %for

 ; CHECK-LABEL: exit:
 ; CHECK-NEXT:    ret void
--- a/test/Transforms/LoopVectorize/followup.ll
+++ b/test/Transforms/LoopVectorize/followup.ll
@ -29,7 +29,7 @@ for.end:
 !5 = !{!"llvm.loop.vectorize.followup_all", !{!"FollowupAll"}}


-; CHECK-LABEL @followup(
+; CHECK-LABEL: @followup(

 ; CHECK-LABEL: vector.body:
 ; CHECK: br i1 %13, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]]
--- a/test/Transforms/PGOProfile/icp_invoke.ll
+++ b/test/Transforms/PGOProfile/icp_invoke.ll
@ -101,6 +101,6 @@ declare void @__cxa_end_catch()
 !1 = !{!"invoke.ll:_ZL4bar2v"}
 !2 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
 !3 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
-; ICP-NOT !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
-; ICP-NOT !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
+; ICP-NOT: !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
+; ICP-NOT: !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
 ; ICP: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
--- a/test/Transforms/PGOProfile/memop_clone.ll
+++ b/test/Transforms/PGOProfile/memop_clone.ll
@ -10,7 +10,7 @@ define i32 @test(i8* %a, i8* %b) !prof !1 {
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %a, i32 undef, i1 false)
 ; CHECK: MemOP.Case.33:
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* %b, i64 3, i1 false)
-; CHECK  MemOP.Case.24:
+; CHECK: MemOP.Case.24:
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* %b, i64 2, i1 false)
 ; CHECK: MemOP.Default2:
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* %b, i64 undef, i1 false)
--- a/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
+++ b/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@ -5,7 +5,7 @@

 ; ENABLE: @{{.*}} = private unnamed_addr constant [6 x i32] [i32 9, i32 20, i32 14, i32 22, i32 12, i32 5]
 ; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [6 x i32] [i32 9, i32 20, i32 14, i32 22, i32 12, i32 5]
-; DISABLE : = phi i32 [ 19, %{{.*}} ], [ 5, %{{.*}} ], [ 12, %{{.*}} ], [ 22, %{{.*}} ], [ 14, %{{.*}} ], [ 20, %{{.*}} ], [ 9, %{{.*}} ]
+; DISABLE: = phi i32 [ 19, %{{.*}} ], [ 5, %{{.*}} ], [ 12, %{{.*}} ], [ 22, %{{.*}} ], [ 14, %{{.*}} ], [ 20, %{{.*}} ], [ 9, %{{.*}} ]

 target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon-unknown--elf"
--- a/test/Transforms/SimplifyCFG/pr33605.ll
+++ b/test/Transforms/SimplifyCFG/pr33605.ll
@ -27,7 +27,7 @@
 ; CHECK: br label %if.end
 ; CHECK-NOT: br label %for.cond
 ; CHECK: if.end:
-; CHECK br label %for.cond
+; CHECK: br label %for.cond
 define i1 @test(i32 %a, i32 %b, i32* %c) {
 entry:
  br label %for.cond
--- a/test/tools/llvm-dwarfdump/X86/statistics.ll
+++ b/test/tools/llvm-dwarfdump/X86/statistics.ll
@ -44,7 +44,7 @@
 ; Because of the dbg.value in the middle of the function, the pc range coverage
 ; must be below 100%.
 ; CHECK-NOT: "scope bytes covered":0
-; CHECK-NOT "scope bytes covered":[[BYTES]]
+; CHECK-NOT: "scope bytes covered":[[BYTES]]
 ; CHECK: "scope bytes covered":
 ; CHECK: "total function size":[[FUNCSIZE:[0-9]+]]
 ; CHECK: "total inlined function size":[[INLINESIZE:[0-9]+]]
--- a/test/tools/llvm-profdata/value-prof.proftext
+++ b/test/tools/llvm-profdata/value-prof.proftext
@ -77,5 +77,5 @@ bar
 #ICSUM: Total number of sites with values: 2
 #ICSUM: Total number of profiled values: 3
 #ICSUM:	NumTargets, SiteCount
-#ICSUM	  1, 1
-#ICSUM	  2, 1
+#ICSUM:	  1, 1
+#ICSUM:	  2, 1
--- a/test/tools/llvm-symbolizer/sym.test
+++ b/test/tools/llvm-symbolizer/sym.test
@ -56,7 +56,7 @@ RUN: llvm-addr2line -pafi -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileChe
 #PRETTY: some text
 #PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3
 #PRETTY:  (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0
-#PRETTY   (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+#PRETTY:  (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0
 #PRETTY: some text2
 #
 #ZERO: ??