1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00
Pierre-vh 39a7b5b535 [LSR][ARM] Add new TTI hook to mark some LSR chains as profitable
This patch adds a new TTI hook to allow targets to tell LSR that
a chain including some instruction is already profitable and
should not be optimized. This patch also adds an implementation
of this TTI hook for ARM so LSR doesn't optimize chains that include
the VCTP intrinsic.

Differential Revision: https://reviews.llvm.org/D79418
2020-05-13 14:18:28 +01:00

258 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve %s -S -loop-reduce -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m-arm-none-eabi"
define float @vctp8(float* %0, i32 %1) {
; CHECK-LABEL: @vctp8(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 [[TMP12]])
; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> undef, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %12)
%mask = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> %15)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctp16(float* %0, i32 %1) {
; CHECK-LABEL: @vctp16(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP12]])
; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> undef, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %12)
%mask = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> %15)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctpi32(float* %0, i32 %1) {
; CHECK-LABEL: @vctpi32(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP12]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[TMP15]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> undef, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
define float @vctpi64(float* %0, i32 %1) {
; CHECK-LABEL: @vctpi64(
; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint float* [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]]
; CHECK-NEXT: br label [[TMP11:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ]
; CHECK-NEXT: [[TMP15:%.*]] = tail call <4 x i1> @llvm.arm.mve.vctp64(i32 [[TMP12]])
; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0
; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[TMP15]], <4 x float> [[TMP13]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4
; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float
; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]])
; CHECK-NEXT: ret float [[TMP25]]
;
%3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8)
%4 = extractvalue { <4 x i32>, i32 } %3, 0
%5 = add nsw i32 %1, -1
%6 = ptrtoint float* %0 to i32
%7 = insertelement <4 x i32> undef, i32 %6, i32 0
%8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef>
%9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer
%10 = add <4 x i32> %4, %9
br label %11
11: ; preds = %11, %2
%12 = phi i32 [ %5, %2 ], [ %20, %11 ]
%13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ]
%14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ]
%15 = tail call <4 x i1> @llvm.arm.mve.vctp64(i32 %12)
%16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15)
%17 = extractvalue { <4 x float>, <4 x i32> } %16, 1
%18 = extractvalue { <4 x float>, <4 x i32> } %16, 0
%19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13)
%20 = add nsw i32 %12, -4
%21 = icmp sgt i32 %12, 4
br i1 %21, label %11, label %22
22: ; preds = %11
%23 = tail call i32 bitcast (i32 (...)* @vecAddAcrossF32Mve to i32 (<4 x float>)*)(<4 x float> %19)
%24 = sitofp i32 %23 to float
%25 = tail call float @llvm.fabs.f32(float %24)
ret float %25
}
declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
declare i32 @vecAddAcrossF32Mve(...)
declare <4 x i1> @v8i1_to_v4i1(<8 x i1>)
declare <4 x i1> @v16i1_to_v4i1(<16 x i1>)
declare float @llvm.fabs.f32(float)