mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
Fix support to use NEON for single precision fp math.
llvm-svn: 78397
This commit is contained in:
parent
7ded8b7bdf
commit
0dab4cc8a0
@ -587,7 +587,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ARM::FSTD:
|
case ARM::FSTD:
|
||||||
case ARM::FSTS:
|
case ARM::FSTS:
|
||||||
if (MI->getOperand(1).isFI() &&
|
if (MI->getOperand(1).isFI() &&
|
||||||
MI->getOperand(2).isImm() &&
|
MI->getOperand(2).isImm() &&
|
||||||
MI->getOperand(2).getImm() == 0) {
|
MI->getOperand(2).getImm() == 0) {
|
||||||
@ -610,8 +610,10 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
|
|||||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||||
|
|
||||||
if (DestRC != SrcRC) {
|
if (DestRC != SrcRC) {
|
||||||
if (((DestRC == ARM::DPRRegisterClass) && (SrcRC == ARM::DPR_VFP2RegisterClass)) ||
|
if (((DestRC == ARM::DPRRegisterClass) &&
|
||||||
((SrcRC == ARM::DPRRegisterClass) && (DestRC == ARM::DPR_VFP2RegisterClass))) {
|
(SrcRC == ARM::DPR_VFP2RegisterClass)) ||
|
||||||
|
((SrcRC == ARM::DPRRegisterClass) &&
|
||||||
|
(DestRC == ARM::DPR_VFP2RegisterClass))) {
|
||||||
// Allow copy between DPR and DPR_VFP2.
|
// Allow copy between DPR and DPR_VFP2.
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
@ -648,7 +650,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
|
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
|
||||||
.addReg(SrcReg, getKillRegState(isKill))
|
.addReg(SrcReg, getKillRegState(isKill))
|
||||||
.addFrameIndex(FI).addReg(0).addImm(0));
|
.addFrameIndex(FI).addReg(0).addImm(0));
|
||||||
} else if (RC == ARM::DPRRegisterClass) {
|
} else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass) {
|
||||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
|
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
|
||||||
.addReg(SrcReg, getKillRegState(isKill))
|
.addReg(SrcReg, getKillRegState(isKill))
|
||||||
.addFrameIndex(FI).addImm(0));
|
.addFrameIndex(FI).addImm(0));
|
||||||
@ -670,7 +672,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||||||
if (RC == ARM::GPRRegisterClass) {
|
if (RC == ARM::GPRRegisterClass) {
|
||||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
|
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
|
||||||
.addFrameIndex(FI).addReg(0).addImm(0));
|
.addFrameIndex(FI).addReg(0).addImm(0));
|
||||||
} else if (RC == ARM::DPRRegisterClass) {
|
} else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass) {
|
||||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
|
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
|
||||||
.addFrameIndex(FI).addImm(0));
|
.addFrameIndex(FI).addImm(0));
|
||||||
} else {
|
} else {
|
||||||
|
@ -334,13 +334,18 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
|||||||
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
|
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
|
||||||
|
|
||||||
// Basic 2-register operations, scalar single-precision
|
// Basic 2-register operations, scalar single-precision
|
||||||
class N2VDInts<SDNode OpNode, NeonI Inst>
|
class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||||
|
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
|
||||||
|
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
|
||||||
|
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
|
||||||
|
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), NoItinerary,
|
||||||
|
!strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
|
||||||
|
|
||||||
|
class N2VDIntsPat<SDNode OpNode, NeonI Inst>
|
||||||
: NEONFPPat<(f32 (OpNode SPR:$a)),
|
: NEONFPPat<(f32 (OpNode SPR:$a)),
|
||||||
(EXTRACT_SUBREG (COPY_TO_REGCLASS
|
(EXTRACT_SUBREG
|
||||||
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
|
||||||
SPR:$a, arm_ssubreg_0)),
|
arm_ssubreg_0)>;
|
||||||
DPR_VFP2),
|
|
||||||
arm_ssubreg_0)>;
|
|
||||||
|
|
||||||
// Narrow 2-register intrinsics.
|
// Narrow 2-register intrinsics.
|
||||||
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||||
@ -380,15 +385,20 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Basic 3-register operations, scalar single-precision
|
// Basic 3-register operations, scalar single-precision
|
||||||
class N3VDs<SDNode OpNode, NeonI Inst>
|
class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||||
|
string OpcodeStr, ValueType ResTy, ValueType OpTy,
|
||||||
|
SDNode OpNode, bit Commutable>
|
||||||
|
: N3V<op24, op23, op21_20, op11_8, 0, op4,
|
||||||
|
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), NoItinerary,
|
||||||
|
!strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> {
|
||||||
|
let isCommutable = Commutable;
|
||||||
|
}
|
||||||
|
class N3VDsPat<SDNode OpNode, NeonI Inst>
|
||||||
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
|
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
|
||||||
(EXTRACT_SUBREG (COPY_TO_REGCLASS
|
(EXTRACT_SUBREG
|
||||||
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
|
||||||
SPR:$a, arm_ssubreg_0),
|
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
|
||||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
arm_ssubreg_0)>;
|
||||||
SPR:$b, arm_ssubreg_0)),
|
|
||||||
DPR_VFP2),
|
|
||||||
arm_ssubreg_0)>;
|
|
||||||
|
|
||||||
// Basic 3-register intrinsics, both double- and quad-register.
|
// Basic 3-register intrinsics, both double- and quad-register.
|
||||||
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||||
@ -427,18 +437,20 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
|||||||
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
|
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
|
||||||
|
|
||||||
// Multiply-Add/Sub operations, scalar single-precision
|
// Multiply-Add/Sub operations, scalar single-precision
|
||||||
class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
|
class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
|
||||||
: NEONFPPat<(f32 (OpNode SPR:$acc,
|
string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode>
|
||||||
(f32 (MulNode SPR:$a, SPR:$b)))),
|
: N3V<op24, op23, op21_20, op11_8, 0, op4,
|
||||||
(EXTRACT_SUBREG (COPY_TO_REGCLASS
|
(outs DPR_VFP2:$dst),
|
||||||
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
(ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), NoItinerary,
|
||||||
SPR:$acc, arm_ssubreg_0),
|
!strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>;
|
||||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
|
||||||
SPR:$a, arm_ssubreg_0),
|
class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
|
||||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
|
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
|
||||||
SPR:$b, arm_ssubreg_0)),
|
(EXTRACT_SUBREG
|
||||||
DPR_VFP2),
|
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
|
||||||
arm_ssubreg_0)>;
|
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
|
||||||
|
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
|
||||||
|
arm_ssubreg_0)>;
|
||||||
|
|
||||||
// Neon 3-argument intrinsics, both double- and quad-register.
|
// Neon 3-argument intrinsics, both double- and quad-register.
|
||||||
// The destination register is also used as the first source operand register.
|
// The destination register is also used as the first source operand register.
|
||||||
@ -1011,9 +1023,6 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
|
|||||||
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
|
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
|
||||||
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
|
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
|
||||||
|
|
||||||
// Vector Add Operations used for single-precision FP
|
|
||||||
def : N3VDs<fadd, VADDfd>;
|
|
||||||
|
|
||||||
// Vector Multiply Operations.
|
// Vector Multiply Operations.
|
||||||
|
|
||||||
// VMUL : Vector Multiply (integer, polynomial and floating-point)
|
// VMUL : Vector Multiply (integer, polynomial and floating-point)
|
||||||
@ -1036,9 +1045,6 @@ def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8,
|
|||||||
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
|
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
|
||||||
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
|
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
|
||||||
|
|
||||||
// Vector Multiply Operations used for single-precision FP
|
|
||||||
def : N3VDs<fmul, VMULfd>;
|
|
||||||
|
|
||||||
// Vector Multiply-Accumulate and Multiply-Subtract Operations.
|
// Vector Multiply-Accumulate and Multiply-Subtract Operations.
|
||||||
|
|
||||||
// VMLA : Vector Multiply Accumulate (integer and floating-point)
|
// VMLA : Vector Multiply Accumulate (integer and floating-point)
|
||||||
@ -1060,10 +1066,6 @@ defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
|
|||||||
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
|
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
|
||||||
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
|
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
|
||||||
|
|
||||||
// Vector Multiply-Accumulate/Subtract used for single-precision FP
|
|
||||||
def : N3VDMulOps<fmul, fadd, VMLAfd>;
|
|
||||||
def : N3VDMulOps<fmul, fsub, VMLSfd>;
|
|
||||||
|
|
||||||
// Vector Subtract Operations.
|
// Vector Subtract Operations.
|
||||||
|
|
||||||
// VSUB : Vector Subtract (integer and floating-point)
|
// VSUB : Vector Subtract (integer and floating-point)
|
||||||
@ -1087,9 +1089,6 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
|
|||||||
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
|
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
|
||||||
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
|
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
|
||||||
|
|
||||||
// Vector Sub Operations used for single-precision FP
|
|
||||||
def : N3VDs<fsub, VSUBfd>;
|
|
||||||
|
|
||||||
// Vector Comparisons.
|
// Vector Comparisons.
|
||||||
|
|
||||||
// VCEQ : Vector Compare Equal
|
// VCEQ : Vector Compare Equal
|
||||||
@ -1453,7 +1452,6 @@ def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
|
|||||||
v2f32, v2f32, int_arm_neon_vabsf>;
|
v2f32, v2f32, int_arm_neon_vabsf>;
|
||||||
def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
|
def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
|
||||||
v4f32, v4f32, int_arm_neon_vabsf>;
|
v4f32, v4f32, int_arm_neon_vabsf>;
|
||||||
def : N2VDInts<fabs, VABSfd>;
|
|
||||||
|
|
||||||
// VQABS : Vector Saturating Absolute Value
|
// VQABS : Vector Saturating Absolute Value
|
||||||
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
|
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
|
||||||
@ -1492,7 +1490,6 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
|
|||||||
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
|
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
|
||||||
"vneg.f32\t$dst, $src", "",
|
"vneg.f32\t$dst, $src", "",
|
||||||
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
|
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
|
||||||
def : N2VDInts<fneg, VNEGf32d>;
|
|
||||||
|
|
||||||
def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
|
def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
|
||||||
def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
|
def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
|
||||||
@ -1906,6 +1903,51 @@ class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
|
|||||||
def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
|
def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
|
||||||
def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
|
def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// NEON instructions for single-precision FP math
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// These need separate instructions because they must use DPR_VFP2 register
|
||||||
|
// class which have SPR sub-registers.
|
||||||
|
|
||||||
|
// Vector Add Operations used for single-precision FP
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>;
|
||||||
|
def : N3VDsPat<fadd, VADDfd_sfp>;
|
||||||
|
|
||||||
|
// Vector Multiply Operations used for single-precision FP
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
|
||||||
|
def : N3VDsPat<fmul, VMULfd_sfp>;
|
||||||
|
|
||||||
|
// Vector Multiply-Accumulate/Subtract used for single-precision FP
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32,fmul,fadd>;
|
||||||
|
def : N3VDMulOpsPat<fmul, fadd, VMLAfd>;
|
||||||
|
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32,fmul,fsub>;
|
||||||
|
def : N3VDMulOpsPat<fmul, fsub, VMLSfd>;
|
||||||
|
|
||||||
|
// Vector Sub Operations used for single-precision FP
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>;
|
||||||
|
def : N3VDsPat<fsub, VSUBfd_sfp>;
|
||||||
|
|
||||||
|
// Vector Absolute for single-precision FP
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
|
||||||
|
v2f32, v2f32, int_arm_neon_vabsf>;
|
||||||
|
def : N2VDIntsPat<fabs, VABSfd_sfp>;
|
||||||
|
|
||||||
|
// Vector Negate for single-precision FP
|
||||||
|
|
||||||
|
let neverHasSideEffects = 1 in
|
||||||
|
def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
|
||||||
|
(outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), NoItinerary,
|
||||||
|
"vneg.f32\t$dst, $src", "", []>;
|
||||||
|
def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Non-Instruction Patterns
|
// Non-Instruction Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
80
test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
Normal file
80
test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -mattr=+neonfp
|
||||||
|
|
||||||
|
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
|
||||||
|
%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
|
||||||
|
%struct.JQUANT_TBL = type { [64 x i16], i32 }
|
||||||
|
%struct.__sFILEX = type opaque
|
||||||
|
%struct.__sbuf = type { i8*, i32 }
|
||||||
|
%struct.anon = type { [8 x i32], [48 x i8] }
|
||||||
|
%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
|
||||||
|
%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
|
||||||
|
%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
|
||||||
|
%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
|
||||||
|
%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
|
||||||
|
%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
|
||||||
|
%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
|
||||||
|
%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
|
||||||
|
%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
|
||||||
|
%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
|
||||||
|
%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
|
||||||
|
%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
|
||||||
|
%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
|
||||||
|
%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
|
||||||
|
%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
|
||||||
|
%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
|
||||||
|
%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
|
||||||
|
%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
|
||||||
|
%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
|
||||||
|
%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
|
||||||
|
%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
|
||||||
|
|
||||||
|
define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
|
||||||
|
entry:
|
||||||
|
br label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %bb, %entry
|
||||||
|
%0 = load float* undef, align 4 ; <float> [#uses=1]
|
||||||
|
%1 = fmul float undef, %0 ; <float> [#uses=2]
|
||||||
|
%tmp73 = add i32 0, 224 ; <i32> [#uses=1]
|
||||||
|
%scevgep74 = getelementptr i8* null, i32 %tmp73 ; <i8*> [#uses=1]
|
||||||
|
%scevgep7475 = bitcast i8* %scevgep74 to float* ; <float*> [#uses=1]
|
||||||
|
%2 = load float* null, align 4 ; <float> [#uses=1]
|
||||||
|
%3 = fmul float 0.000000e+00, %2 ; <float> [#uses=2]
|
||||||
|
%4 = fadd float %1, %3 ; <float> [#uses=1]
|
||||||
|
%5 = fsub float %1, %3 ; <float> [#uses=2]
|
||||||
|
%6 = fadd float undef, 0.000000e+00 ; <float> [#uses=2]
|
||||||
|
%7 = fmul float undef, 0x3FF6A09E60000000 ; <float> [#uses=1]
|
||||||
|
%8 = fsub float %7, %6 ; <float> [#uses=2]
|
||||||
|
%9 = fsub float %4, %6 ; <float> [#uses=1]
|
||||||
|
%10 = fadd float %5, %8 ; <float> [#uses=2]
|
||||||
|
%11 = fsub float %5, %8 ; <float> [#uses=1]
|
||||||
|
%12 = sitofp i16 undef to float ; <float> [#uses=1]
|
||||||
|
%13 = fmul float %12, 0.000000e+00 ; <float> [#uses=2]
|
||||||
|
%14 = sitofp i16 undef to float ; <float> [#uses=1]
|
||||||
|
%15 = load float* %scevgep7475, align 4 ; <float> [#uses=1]
|
||||||
|
%16 = fmul float %14, %15 ; <float> [#uses=2]
|
||||||
|
%17 = fadd float undef, undef ; <float> [#uses=2]
|
||||||
|
%18 = fadd float %13, %16 ; <float> [#uses=2]
|
||||||
|
%19 = fsub float %13, %16 ; <float> [#uses=1]
|
||||||
|
%20 = fadd float %18, %17 ; <float> [#uses=2]
|
||||||
|
%21 = fsub float %18, %17 ; <float> [#uses=1]
|
||||||
|
%22 = fmul float %21, 0x3FF6A09E60000000 ; <float> [#uses=1]
|
||||||
|
%23 = fmul float undef, 0x3FFD906BC0000000 ; <float> [#uses=2]
|
||||||
|
%24 = fmul float %19, 0x3FF1517A80000000 ; <float> [#uses=1]
|
||||||
|
%25 = fsub float %24, %23 ; <float> [#uses=1]
|
||||||
|
%26 = fadd float undef, %23 ; <float> [#uses=1]
|
||||||
|
%27 = fsub float %26, %20 ; <float> [#uses=3]
|
||||||
|
%28 = fsub float %22, %27 ; <float> [#uses=2]
|
||||||
|
%29 = fadd float %25, %28 ; <float> [#uses=1]
|
||||||
|
%30 = fadd float undef, %20 ; <float> [#uses=1]
|
||||||
|
store float %30, float* undef, align 4
|
||||||
|
%31 = fadd float %10, %27 ; <float> [#uses=1]
|
||||||
|
store float %31, float* undef, align 4
|
||||||
|
%32 = fsub float %10, %27 ; <float> [#uses=1]
|
||||||
|
store float %32, float* undef, align 4
|
||||||
|
%33 = fadd float %11, %28 ; <float> [#uses=1]
|
||||||
|
store float %33, float* undef, align 4
|
||||||
|
%34 = fsub float %9, %29 ; <float> [#uses=1]
|
||||||
|
store float %34, float* undef, align 4
|
||||||
|
br label %bb
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user