diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index bc6f852a018..8a652c1d90f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -708,6 +708,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let Inst{22} = Dd{4}; let Predicates = [HasVFP2, HasDPVFP]; + let hasSideEffects = 0; } // Special case encoding: bits 11-8 is 0b1011. @@ -732,9 +733,11 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{4} = 0; let Predicates = [HasVFP2, HasDPVFP]; + let hasSideEffects = 0; } // Between half, single and double-precision. +let hasSideEffects = 0 in def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* Intentionally left blank, see patterns below */]>, @@ -746,6 +749,7 @@ def : FP16Pat<(f32 (fpextend (f16 HPR:$Sm))), def : FP16Pat<(f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; +let hasSideEffects = 0 in def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* Intentionally left blank, see patterns below */]>, @@ -763,6 +767,7 @@ def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; +let hasSideEffects = 0 in def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* Intentionally left blank, see patterns below */]>, @@ -776,6 +781,7 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), (SSubReg_f16_reg imm_odd:$lane)))>; +let hasSideEffects = 0 in def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* Intentionally left blank, see patterns below */]>, @@ -801,6 +807,8 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, // Encode instruction operands. let Inst{3-0} = Sm{4-1}; let Inst{5} = Sm{0}; + + let hasSideEffects = 0; } def : FullFP16Pat<(f64 (fpextend (f16 HPR:$Sm))), @@ -824,6 +832,8 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, let Inst{5} = Dm{4}; let Inst{15-12} = Sd{4-1}; let Inst{22} = Sd{0}; + + let hasSideEffects = 0; } def : FullFP16Pat<(f16 (fpround DPR:$Dm)), @@ -843,6 +853,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, // Encode instruction operands. let Inst{3-0} = Sm{4-1}; let Inst{5} = Sm{0}; + + let hasSideEffects = 0; } def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, @@ -858,11 +870,13 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, let Inst{22} = Sd{0}; let Inst{3-0} = Dm{3-0}; let Inst{5} = Dm{4}; + + let hasSideEffects = 0; } multiclass vcvt_inst rm, SDPatternOperator node = null_frag> { - let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8", hasSideEffects = 0 in { def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins HPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"), @@ -1367,6 +1381,7 @@ class AVConv1IDs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Dd{4}; let Predicates = [HasVFP2, HasDPVFP]; + let hasSideEffects = 0; } class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1383,6 +1398,8 @@ class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Sm{0}; let Inst{15-12} = Sd{4-1}; let Inst{22} = Sd{0}; + + let hasSideEffects = 0; } class AVConv1IHs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1402,6 +1419,7 @@ class AVConv1IHs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; let Predicates = [HasFullFP16]; + let hasSideEffects = 0; } def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -1515,6 +1533,7 @@ class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; let Predicates = [HasVFP2, HasDPVFP]; + let hasSideEffects = 0; } class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1551,6 +1570,7 @@ class AVConv1IsH_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; let Predicates = [HasFullFP16]; + let hasSideEffects = 0; } // Always set Z bit in the instruction, i.e. "round towards zero" variants. @@ -1730,6 +1750,8 @@ class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; let Inst{15-12} = dst{4-1}; + + let hasSideEffects = 0; } // Double Precision register @@ -1742,6 +1764,7 @@ class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, let Inst{22} = dst{4}; let Inst{15-12} = dst{3-0}; + let hasSideEffects = 0; let Predicates = [HasVFP2, HasDPVFP]; } @@ -1940,7 +1963,9 @@ class BF16_VCVT op7_6> let Inst{11-8} = 0b1001; let Inst{7-6} = op7_6; let Inst{4} = 0; + let DecoderNamespace = "VFPV8"; + let hasSideEffects = 0; } def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>; diff --git a/test/CodeGen/ARM/cmov_fp16.ll b/test/CodeGen/ARM/cmov_fp16.ll index 2c368d1a41b..805955d3e83 100644 --- a/test/CodeGen/ARM/cmov_fp16.ll +++ b/test/CodeGen/ARM/cmov_fp16.ll @@ -1,14 +1,15 @@ -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK -; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-THUMB +; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-ARM define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_ne: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -25,11 +26,11 @@ entry: define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_eq: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r1 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -46,11 +47,11 @@ entry: define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_gt: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r1 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vselgt.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -67,11 +68,11 @@ entry: define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_ge: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r1 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -88,11 +89,11 @@ entry: define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_lt: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -109,11 +110,11 @@ entry: define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_le: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-NEXT: vselgt.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr @@ -130,11 +131,11 @@ entry: define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_hi: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: cmp r2, r3 ; CHECK-THUMB-NEXT: vmov s0, r1 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s2, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-THUMB-NEXT: it hi ; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2 ; CHECK-THUMB-NEXT: vmov.f16 r0, s0 @@ -142,11 +143,11 @@ define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { ; ; CHECK-ARM-LABEL: test_hi: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s2, r0 -; CHECK-ARM-NEXT: cmp r2, r3 ; CHECK-ARM-NEXT: vmov s0, r1 -; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s2, r0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-ARM-NEXT: vmovhi.f32 s0, s2 ; CHECK-ARM-NEXT: vmov.f16 r0, s0 ; CHECK-ARM-NEXT: bx lr @@ -163,11 +164,11 @@ entry: define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_hs: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: cmp r2, r3 ; CHECK-THUMB-NEXT: vmov s0, r1 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s2, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-THUMB-NEXT: it hs ; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2 ; CHECK-THUMB-NEXT: vmov.f16 r0, s0 @@ -175,11 +176,11 @@ define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { ; ; CHECK-ARM-LABEL: test_hs: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s2, r0 -; CHECK-ARM-NEXT: cmp r2, r3 ; CHECK-ARM-NEXT: vmov s0, r1 -; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s2, r0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-ARM-NEXT: vmovhs.f32 s0, s2 ; CHECK-ARM-NEXT: vmov.f16 r0, s0 ; CHECK-ARM-NEXT: bx lr @@ -196,11 +197,11 @@ entry: define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_lo: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: cmp r2, r3 ; CHECK-THUMB-NEXT: vmov s0, r1 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s2, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-THUMB-NEXT: it lo ; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2 ; CHECK-THUMB-NEXT: vmov.f16 r0, s0 @@ -208,11 +209,11 @@ define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { ; ; CHECK-ARM-LABEL: test_lo: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s2, r0 -; CHECK-ARM-NEXT: cmp r2, r3 ; CHECK-ARM-NEXT: vmov s0, r1 -; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s2, r0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-ARM-NEXT: vmovlo.f32 s0, s2 ; CHECK-ARM-NEXT: vmov.f16 r0, s0 ; CHECK-ARM-NEXT: bx lr @@ -229,11 +230,11 @@ entry: define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_ls: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: cmp r2, r3 ; CHECK-THUMB-NEXT: vmov s0, r1 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s2, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-THUMB-NEXT: it ls ; CHECK-THUMB-NEXT: vmovls.f32 s0, s2 ; CHECK-THUMB-NEXT: vmov.f16 r0, s0 @@ -241,11 +242,11 @@ define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { ; ; CHECK-ARM-LABEL: test_ls: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s2, r0 -; CHECK-ARM-NEXT: cmp r2, r3 ; CHECK-ARM-NEXT: vmov s0, r1 -; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s2, r0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 ; CHECK-ARM-NEXT: vmovls.f32 s0, s2 ; CHECK-ARM-NEXT: vmov.f16 r0, s0 ; CHECK-ARM-NEXT: bx lr diff --git a/test/CodeGen/ARM/fp16-args.ll b/test/CodeGen/ARM/fp16-args.ll index e858661d32f..7ed1e883eef 100644 --- a/test/CodeGen/ARM/fp16-args.ll +++ b/test/CodeGen/ARM/fp16-args.ll @@ -11,10 +11,10 @@ define half @foo(half %a, half %b) { ; SOFT-LABEL: foo: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: vmov s2, r1 ; SOFT-NEXT: vmov s0, r0 -; SOFT-NEXT: vcvtb.f32.f16 s2, s2 +; SOFT-NEXT: vmov s2, r1 ; SOFT-NEXT: vcvtb.f32.f16 s0, s0 +; SOFT-NEXT: vcvtb.f32.f16 s2, s2 ; SOFT-NEXT: vadd.f32 s0, s0, s2 ; SOFT-NEXT: vcvtb.f16.f32 s0, s0 ; SOFT-NEXT: vmov r0, s0 diff --git a/test/CodeGen/ARM/fp16-bitcast.ll b/test/CodeGen/ARM/fp16-bitcast.ll index 4254f2ecef4..d26c2d96614 100644 --- a/test/CodeGen/ARM/fp16-bitcast.ll +++ b/test/CodeGen/ARM/fp16-bitcast.ll @@ -40,10 +40,10 @@ entry: define half @addf16(half %a, half %b) { ; CHECK-VFPV4-SOFT-LABEL: addf16: ; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry -; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1 ; CHECK-VFPV4-SOFT-NEXT: vmov s0, r0 -; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s0, s2 ; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0 diff --git a/test/CodeGen/ARM/fp16-fullfp16.ll b/test/CodeGen/ARM/fp16-fullfp16.ll index e912cee3624..86a8caa76aa 100644 --- a/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/test/CodeGen/ARM/fp16-fullfp16.ll @@ -61,8 +61,8 @@ define arm_aapcs_vfpcc void @test_frem(half* %p, half* %q) { ; CHECK-LABEL: test_frem: ; CHECK: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2 @@ -285,8 +285,8 @@ define void @test_pow(half* %p, half* %q) { ; CHECK-LABEL: test_pow: ; CHECK: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2 diff --git a/test/CodeGen/ARM/fp16-instructions.ll b/test/CodeGen/ARM/fp16-instructions.ll index 260dd12b3e2..5e3e4469fcc 100644 --- a/test/CodeGen/ARM/fp16-instructions.ll +++ b/test/CodeGen/ARM/fp16-instructions.ll @@ -72,10 +72,10 @@ entry: ; CHECK-SOFTFP-VFP3: vadd.f32 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h -; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 -; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] ; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] ; CHECK-SOFTFP-FP16: vmov r0, s0 @@ -355,10 +355,10 @@ entry: ; CHECK-SOFTFP-VFP3: vdiv.f32 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h -; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 -; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] ; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] ; CHECK-SOFTFP-FP16: vmov r0, s0 @@ -577,10 +577,10 @@ entry: ; CHECK-SOFTFP-VFP3: vmul.f32 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h -; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 -; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] ; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] ; CHECK-SOFTFP-FP16: vmov r0, s0 @@ -973,10 +973,10 @@ entry: ; CHECK-SOFTFP-VFP3: vsub.f32 ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h -; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 -; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] -; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] +; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] ; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] ; CHECK-SOFTFP-FP16: vmov r0, s0 diff --git a/test/CodeGen/ARM/no-fpscr-liveness.ll b/test/CodeGen/ARM/no-fpscr-liveness.ll index 6575ab6c55d..4fdb961e8e5 100644 --- a/test/CodeGen/ARM/no-fpscr-liveness.ll +++ b/test/CodeGen/ARM/no-fpscr-liveness.ll @@ -12,7 +12,9 @@ target triple = "thumbv7s-apple-ios" ; CHECK-LABEL: eggs: ; CHECK: sub sp, #8 ; VMRS instruction comes before any other instruction writing FPSCR: -; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NOT: vcmp +; CHECK: vmrs {{r[0-9]}}, fpscr +; CHECK; vcmp ; ... ; CHECK: add sp, #8 ; CHECK: bx lr diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index f16c8dc3a15..8db0856fdb9 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -296,13 +296,13 @@ define <4 x i16> @fix_double_to_i16(<4 x double> %in) { ; CHECK-NEXT: vcvt.s32.f64 s0, d18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vadd.f64 d20, d16, d16 -; CHECK-NEXT: vadd.f64 d19, d19, d19 ; CHECK-NEXT: vadd.f64 d16, d17, d17 ; CHECK-NEXT: vcvt.s32.f64 s2, d20 -; CHECK-NEXT: vcvt.s32.f64 s4, d19 ; CHECK-NEXT: vcvt.s32.f64 s6, d16 ; CHECK-NEXT: vmov.32 d16[0], r0 ; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vadd.f64 d19, d19, d19 +; CHECK-NEXT: vcvt.s32.f64 s4, d19 ; CHECK-NEXT: vmov.32 d17[0], r0 ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmov.32 d16[1], r0 @@ -355,10 +355,8 @@ define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.s32.f64 s0, d16 +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: vcvt.s32.f64 s0, d16 -; CHECK-NEXT: vcvt.s32.f64 s2, d16 -; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vstr s0, [r3] ; CHECK-NEXT: mov pc, lr %conv = fptosi double %c to i32 @@ -372,10 +370,8 @@ define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f64 s0, d16 +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: vcvt.u32.f64 s0, d16 -; CHECK-NEXT: vcvt.u32.f64 s2, d16 -; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vstr s0, [r3] ; CHECK-NEXT: mov pc, lr %conv = fptoui double %c to i32 diff --git a/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index b64c2378949..e69610f9df2 100644 --- a/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -710,10 +710,10 @@ define arm_aapcs_vfpcc void @float_int_mul(float* nocapture readonly %a, i32* no ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: add.w r12, r12, #1 -; CHECK-NEXT: vmov s0, r4 -; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vldr s2, [r5] ; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: vmov s0, r4 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r7] ; CHECK-NEXT: adds r7, #4 @@ -739,19 +739,19 @@ define arm_aapcs_vfpcc void @float_int_mul(float* nocapture readonly %a, i32* no ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6] ; CHECK-NEXT: vldr s0, [r1, #-4] -; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vldr s2, [r7, #4] +; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #4] ; CHECK-NEXT: vldr s0, [r1] -; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vldr s2, [r7, #8] +; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #8] ; CHECK-NEXT: vldr s0, [r1, #4] ; CHECK-NEXT: add.w r1, r1, #16 -; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vldr s2, [r7, #12] +; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #12] ; CHECK-NEXT: bne .LBB3_12 @@ -1490,18 +1490,18 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n ; CHECK-NEXT: vmul.f16 s2, s4, s2 ; CHECK-NEXT: vldr.16 s4, [r2, #4] ; CHECK-NEXT: vldr.16 s10, [r4] -; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vmul.f16 s4, s6, s4 ; CHECK-NEXT: vldr.16 s6, [r2, #2] -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: vcvtb.f32.f16 s4, s4 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vmul.f16 s6, s8, s6 ; CHECK-NEXT: vldr.16 s8, [r2] +; CHECK-NEXT: vcvtb.f32.f16 s6, s6 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmul.f16 s8, s10, s8 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: vcvtb.f32.f16 s6, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s8 -; CHECK-NEXT: vcvtb.f32.f16 s4, s4 -; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 @@ -1647,18 +1647,18 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n ; CHECK-NEXT: vadd.f16 s2, s4, s2 ; CHECK-NEXT: vldr.16 s4, [r2, #4] ; CHECK-NEXT: vldr.16 s10, [r4] -; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vadd.f16 s4, s6, s4 ; CHECK-NEXT: vldr.16 s6, [r2, #2] -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: vcvtb.f32.f16 s4, s4 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vadd.f16 s6, s8, s6 ; CHECK-NEXT: vldr.16 s8, [r2] +; CHECK-NEXT: vcvtb.f32.f16 s6, s6 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vadd.f16 s8, s10, s8 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: vcvtb.f32.f16 s6, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s8 -; CHECK-NEXT: vcvtb.f32.f16 s4, s4 -; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 @@ -1798,32 +1798,32 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-NEXT: vldr.16 s2, [r2, #2] +; CHECK-NEXT: ldrsh r5, [r3, #-2] ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov s4, r4 -; CHECK-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-NEXT: ldrsh.w r4, [r3] +; CHECK-NEXT: vcvt.f16.s32 s4, s4 +; CHECK-NEXT: vmov s8, r5 ; CHECK-NEXT: vmul.f16 s2, s2, s4 ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vmov s6, r4 -; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: ldrsh r5, [r3, #-2] ; CHECK-NEXT: ldrsh r4, [r3, #-4] +; CHECK-NEXT: vcvt.f16.s32 s6, s6 +; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmul.f16 s4, s4, s6 ; CHECK-NEXT: vldr.16 s6, [r2, #-2] -; CHECK-NEXT: adds r3, #8 -; CHECK-NEXT: vmov s8, r5 -; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmov s10, r4 +; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 ; CHECK-NEXT: vldr.16 s8, [r2, #-4] ; CHECK-NEXT: vcvt.f16.s32 s10, s10 -; CHECK-NEXT: adds r2, #8 -; CHECK-NEXT: vmul.f16 s8, s8, s10 -; CHECK-NEXT: vcvtb.f32.f16 s8, s8 ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-NEXT: vadd.f32 s0, s0, s8 -; CHECK-NEXT: vcvtb.f32.f16 s4, s4 +; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-NEXT: vcvtb.f32.f16 s8, s8 +; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: vadd.f32 s0, s0, s8 +; CHECK-NEXT: adds r2, #8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 diff --git a/test/CodeGen/Thumb2/mve-masked-store.ll b/test/CodeGen/Thumb2/mve-masked-store.ll index f2c51daef7c..3a905296fb4 100644 --- a/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/test/CodeGen/Thumb2/mve-masked-store.ll @@ -1223,13 +1223,9 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: it gt ; CHECK-LE-NEXT: movgt r1, #1 ; CHECK-LE-NEXT: cmp r1, #0 @@ -1265,9 +1261,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: cset r2, ne ; CHECK-LE-NEXT: and r2, r2, #1 ; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB25_5 ; CHECK-LE-NEXT: @ %bb.1: @ %else @@ -1307,17 +1307,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 +; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r2, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 @@ -1334,6 +1330,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: and r3, r3, #1 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: mov.w r3, #0 ; CHECK-BE-NEXT: it gt @@ -1350,7 +1347,10 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 +; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: lsls r2, r1, #31 ; CHECK-BE-NEXT: bne .LBB25_5 ; CHECK-BE-NEXT: @ %bb.1: @ %else @@ -1395,13 +1395,9 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: it gt ; CHECK-LE-NEXT: movgt r1, #1 ; CHECK-LE-NEXT: cmp r1, #0 @@ -1437,9 +1433,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: cset r2, ne ; CHECK-LE-NEXT: and r2, r2, #1 ; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB26_5 ; CHECK-LE-NEXT: @ %bb.1: @ %else @@ -1479,17 +1479,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 +; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r2, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 @@ -1506,6 +1502,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: and r3, r3, #1 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: mov.w r3, #0 ; CHECK-BE-NEXT: it gt @@ -1522,7 +1519,10 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 +; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: lsls r2, r1, #31 ; CHECK-BE-NEXT: bne .LBB26_5 ; CHECK-BE-NEXT: @ %bb.1: @ %else @@ -1567,13 +1567,9 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: .pad #20 ; CHECK-LE-NEXT: sub sp, #20 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: it gt ; CHECK-LE-NEXT: movgt r1, #1 ; CHECK-LE-NEXT: cmp r1, #0 @@ -1609,9 +1605,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: cset r2, ne ; CHECK-LE-NEXT: and r2, r2, #1 ; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB27_5 ; CHECK-LE-NEXT: @ %bb.1: @ %else @@ -1659,17 +1659,13 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: sub sp, #20 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 +; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r2, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 @@ -1686,6 +1682,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: and r3, r3, #1 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: mov.w r3, #0 ; CHECK-BE-NEXT: it gt @@ -1702,7 +1699,10 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 +; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: lsls r2, r1, #31 ; CHECK-BE-NEXT: bne .LBB27_5 ; CHECK-BE-NEXT: @ %bb.1: @ %else diff --git a/test/CodeGen/Thumb2/mve-vcvt.ll b/test/CodeGen/Thumb2/mve-vcvt.ll index 0cfe5cc80c1..ddc36f3be58 100644 --- a/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/test/CodeGen/Thumb2/mve-vcvt.ll @@ -213,31 +213,31 @@ entry: define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) { ; CHECK-MVE-LABEL: foo_int16_half: ; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmovx.f16 s14, s0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1 -; CHECK-MVE-NEXT: vmovx.f16 s14, s0 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 ; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 -; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 ; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 -; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 ; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 -; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q0[0], r0 ; CHECK-MVE-NEXT: vmov r0, s14 ; CHECK-MVE-NEXT: vmov.16 q0[1], r0 ; CHECK-MVE-NEXT: vmov r0, s5 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 ; CHECK-MVE-NEXT: vmov.16 q0[2], r0 ; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 ; CHECK-MVE-NEXT: vmov.16 q0[3], r0 ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: vmov.16 q0[4], r0 ; CHECK-MVE-NEXT: vmov r0, s6 ; CHECK-MVE-NEXT: vmov.16 q0[5], r0 ; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 ; CHECK-MVE-NEXT: vmov.16 q0[6], r0 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov.16 q0[7], r0 @@ -255,31 +255,31 @@ entry: define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) { ; CHECK-MVE-LABEL: foo_uint16_half: ; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmovx.f16 s14, s0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1 -; CHECK-MVE-NEXT: vmovx.f16 s14, s0 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 ; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 -; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 ; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 -; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 ; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 -; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.16 q0[0], r0 ; CHECK-MVE-NEXT: vmov r0, s14 ; CHECK-MVE-NEXT: vmov.16 q0[1], r0 ; CHECK-MVE-NEXT: vmov r0, s5 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 ; CHECK-MVE-NEXT: vmov.16 q0[2], r0 ; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 ; CHECK-MVE-NEXT: vmov.16 q0[3], r0 ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: vmov.16 q0[4], r0 ; CHECK-MVE-NEXT: vmov r0, s6 ; CHECK-MVE-NEXT: vmov.16 q0[5], r0 ; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 ; CHECK-MVE-NEXT: vmov.16 q0[6], r0 ; CHECK-MVE-NEXT: vmov r0, s4 ; CHECK-MVE-NEXT: vmov.16 q0[7], r0 diff --git a/test/CodeGen/Thumb2/mve-vcvt16.ll b/test/CodeGen/Thumb2/mve-vcvt16.ll index 86cc21d05f9..c5421feb6c9 100644 --- a/test/CodeGen/Thumb2/mve-vcvt16.ll +++ b/test/CodeGen/Thumb2/mve-vcvt16.ll @@ -18,15 +18,15 @@ entry: define arm_aapcs_vfpcc <8 x float> @fpext_8(<8 x half> %src1) { ; CHECK-LABEL: fpext_8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvtt.f32.f16 s11, s1 -; CHECK-NEXT: vcvtb.f32.f16 s10, s1 -; CHECK-NEXT: vcvtt.f32.f16 s9, s0 -; CHECK-NEXT: vcvtb.f32.f16 s8, s0 -; CHECK-NEXT: vcvtt.f32.f16 s7, s3 -; CHECK-NEXT: vcvtb.f32.f16 s6, s3 -; CHECK-NEXT: vcvtt.f32.f16 s5, s2 -; CHECK-NEXT: vcvtb.f32.f16 s4, s2 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vmov q2, q0 +; CHECK-NEXT: vcvtt.f32.f16 s3, s9 +; CHECK-NEXT: vcvtt.f32.f16 s7, s11 +; CHECK-NEXT: vcvtb.f32.f16 s2, s9 +; CHECK-NEXT: vcvtb.f32.f16 s6, s11 +; CHECK-NEXT: vcvtt.f32.f16 s1, s8 +; CHECK-NEXT: vcvtt.f32.f16 s5, s10 +; CHECK-NEXT: vcvtb.f32.f16 s0, s8 +; CHECK-NEXT: vcvtb.f32.f16 s4, s10 ; CHECK-NEXT: bx lr entry: %out = fpext <8 x half> %src1 to <8 x float> @@ -247,12 +247,12 @@ define arm_aapcs_vfpcc <8 x float> @load_shuffleext_16(<16 x half>* %src) { ; CHECK-NEXT: vld20.16 {q2, q3}, [r0] ; CHECK-NEXT: vld21.16 {q2, q3}, [r0] ; CHECK-NEXT: vcvtt.f32.f16 s3, s9 -; CHECK-NEXT: vcvtb.f32.f16 s2, s9 -; CHECK-NEXT: vcvtt.f32.f16 s1, s8 -; CHECK-NEXT: vcvtb.f32.f16 s0, s8 ; CHECK-NEXT: vcvtt.f32.f16 s7, s11 +; CHECK-NEXT: vcvtb.f32.f16 s2, s9 ; CHECK-NEXT: vcvtb.f32.f16 s6, s11 +; CHECK-NEXT: vcvtt.f32.f16 s1, s8 ; CHECK-NEXT: vcvtt.f32.f16 s5, s10 +; CHECK-NEXT: vcvtb.f32.f16 s0, s8 ; CHECK-NEXT: vcvtb.f32.f16 s4, s10 ; CHECK-NEXT: bx lr entry: diff --git a/unittests/Target/ARM/MachineInstrTest.cpp b/unittests/Target/ARM/MachineInstrTest.cpp index 51c9d739f91..3e5403f2098 100644 --- a/unittests/Target/ARM/MachineInstrTest.cpp +++ b/unittests/Target/ARM/MachineInstrTest.cpp @@ -973,13 +973,110 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { TEST(MachineInstr, HasSideEffects) { using namespace ARM; std::set UnpredictableOpcodes = { - MVE_VCTP8, MVE_VCTP16, MVE_VCTP32, MVE_VCTP64, MVE_VPST, - MVE_VPTv16i8, MVE_VPTv8i16, MVE_VPTv4i32, MVE_VPTv16i8r, MVE_VPTv8i16r, - MVE_VPTv4i32r, MVE_VPTv16s8, MVE_VPTv8s16, MVE_VPTv4s32, MVE_VPTv16s8r, - MVE_VPTv8s16r, MVE_VPTv4s32r, MVE_VPTv16u8, MVE_VPTv8u16, MVE_VPTv4u32, - MVE_VPTv16u8r, MVE_VPTv8u16r, MVE_VPTv4u32r, MVE_VPTv8f16, MVE_VPTv4f32, - MVE_VPTv8f16r, MVE_VPTv4f32r, MVE_VADC, MVE_VADCI, MVE_VSBC, - MVE_VSBCI, MVE_VSHLC, + // MVE Instructions + MVE_VCTP8, + MVE_VCTP16, + MVE_VCTP32, + MVE_VCTP64, + MVE_VPST, + MVE_VPTv16i8, + MVE_VPTv8i16, + MVE_VPTv4i32, + MVE_VPTv16i8r, + MVE_VPTv8i16r, + MVE_VPTv4i32r, + MVE_VPTv16s8, + MVE_VPTv8s16, + MVE_VPTv4s32, + MVE_VPTv16s8r, + MVE_VPTv8s16r, + MVE_VPTv4s32r, + MVE_VPTv16u8, + MVE_VPTv8u16, + MVE_VPTv4u32, + MVE_VPTv16u8r, + MVE_VPTv8u16r, + MVE_VPTv4u32r, + MVE_VPTv8f16, + MVE_VPTv4f32, + MVE_VPTv8f16r, + MVE_VPTv4f32r, + MVE_VADC, + MVE_VADCI, + MVE_VSBC, + MVE_VSBCI, + MVE_VSHLC, + // FP Instructions + FLDMXIA, + FLDMXDB_UPD, + FLDMXIA_UPD, + FSTMXDB_UPD, + FSTMXIA, + FSTMXIA_UPD, + VLDR_FPCXTNS_off, + VLDR_FPCXTNS_off, + VLDR_FPCXTNS_post, + VLDR_FPCXTNS_pre, + VLDR_FPCXTS_off, + VLDR_FPCXTS_post, + VLDR_FPCXTS_pre, + VLDR_FPSCR_NZCVQC_off, + VLDR_FPSCR_NZCVQC_post, + VLDR_FPSCR_NZCVQC_pre, + VLDR_FPSCR_off, + VLDR_FPSCR_post, + VLDR_FPSCR_pre, + VLDR_P0_off, + VLDR_P0_post, + VLDR_P0_pre, + VLDR_VPR_off, + VLDR_VPR_post, + VLDR_VPR_pre, + VLLDM, + VLSTM, + VMRS, + VMRS_FPCXTNS, + VMRS_FPCXTS, + VMRS_FPEXC, + VMRS_FPINST, + VMRS_FPINST2, + VMRS_FPSCR_NZCVQC, + VMRS_FPSID, + VMRS_MVFR0, + VMRS_MVFR1, + VMRS_MVFR2, + VMRS_P0, + VMRS_VPR, + VMSR, + VMSR_FPCXTNS, + VMSR_FPCXTS, + VMSR_FPEXC, + VMSR_FPINST, + VMSR_FPINST2, + VMSR_FPSCR_NZCVQC, + VMSR_FPSID, + VMSR_P0, + VMSR_VPR, + VSCCLRMD, + VSCCLRMS, + VSTR_FPCXTNS_off, + VSTR_FPCXTNS_post, + VSTR_FPCXTNS_pre, + VSTR_FPCXTS_off, + VSTR_FPCXTS_post, + VSTR_FPCXTS_pre, + VSTR_FPSCR_NZCVQC_off, + VSTR_FPSCR_NZCVQC_post, + VSTR_FPSCR_NZCVQC_pre, + VSTR_FPSCR_off, + VSTR_FPSCR_post, + VSTR_FPSCR_pre, + VSTR_P0_off, + VSTR_P0_post, + VSTR_P0_pre, + VSTR_VPR_off, + VSTR_VPR_post, + VSTR_VPR_pre, }; LLVMInitializeARMTargetInfo(); @@ -1006,7 +1103,8 @@ TEST(MachineInstr, HasSideEffects) { for (unsigned Op = 0; Op < ARM::INSTRUCTION_LIST_END; ++Op) { const MCInstrDesc &Desc = TII->get(Op); - if ((Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainMVE) + if ((Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainMVE && + (Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainVFP) continue; if (UnpredictableOpcodes.count(Op)) continue;