mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
94d1ca9e9c
Summary: Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals. E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least. With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction). Here are rules how we convert literals: - We parsed fp literal: - Instruction expects 64-bit operand: - If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5) - then we do nothing this literal - Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5) - report error - Else literal is not-inlinable but we can encode it as additional 32-bit literal constant - If instruction expect fp operand type (f64) - Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5) - If so then do nothing - Else (e.g. v_fract_f64 v[0:1], 3.1415) - report warning that low 32 bits will be set to zeroes and precision will be lost - set low 32 bits of literal to zeroes - Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5) - report error as it is unclear how to encode this literal - Instruction expects 32-bit operand: - Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow - Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5) - do nothing - Else report error - Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0) - Parsed binary literal: - Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35) - do nothing - Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35) - report error - Else, literal is not-inlinable and we are not required to inline it - Are high 32 bit of literal zeroes or same as sign bit (32 bit) - do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef) - Else - report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0) For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types: ''' enum OperandType { OPERAND_REG_IMM32_INT, OPERAND_REG_IMM32_FP, OPERAND_REG_INLINE_C_INT, OPERAND_REG_INLINE_C_FP, } ''' This is not working yet: - Several tests are failing - Problems with predicate methods for inline immediates - LLVM generated assembler parts try to select e64 encoding before e32. More changes are required for several AsmOperands. Reviewers: vpykhtin, tstellarAMD Subscribers: arsenm, kzhuravl, artem.tamazov Differential Revision: https://reviews.llvm.org/D22922 llvm-svn: 281050
113 lines
6.1 KiB
ArmAsm
113 lines
6.1 KiB
ArmAsm
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
|
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
|
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI %s
|
|
// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s
|
|
|
|
s_mov_b32 [ttmp5], [ttmp3]
|
|
// SICI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x03,0xf5,0xbe]
|
|
// VI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x00,0xf5,0xbe]
|
|
|
|
s_mov_b64 [ttmp4,ttmp5], [ttmp2,ttmp3]
|
|
// SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe]
|
|
// VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe]
|
|
|
|
s_mov_b64 ttmp[4:5], ttmp[2:3]
|
|
// SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe]
|
|
// VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe]
|
|
|
|
s_mov_b64 [s6,s7], s[8:9]
|
|
// SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe]
|
|
// VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe]
|
|
|
|
s_mov_b64 s[6:7], [s8,s9]
|
|
// SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe]
|
|
// VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe]
|
|
|
|
s_mov_b64 [exec_lo,exec_hi], s[2:3]
|
|
// SICI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe]
|
|
// VI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x01,0xfe,0xbe]
|
|
|
|
s_mov_b64 [flat_scratch_lo,flat_scratch_hi], s[2:3]
|
|
// NOSICI: error:
|
|
// VI: s_mov_b64 flat_scratch, s[2:3] ; encoding: [0x02,0x01,0xe6,0xbe]
|
|
|
|
s_mov_b64 [vcc_lo,vcc_hi], s[2:3]
|
|
// SICI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe]
|
|
// VI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x01,0xea,0xbe]
|
|
|
|
s_mov_b64 [tba_lo,tba_hi], s[2:3]
|
|
// SICI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x04,0xec,0xbe]
|
|
// VI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x01,0xec,0xbe]
|
|
|
|
s_mov_b64 [tma_lo,tma_hi], s[2:3]
|
|
// SICI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x04,0xee,0xbe]
|
|
// VI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x01,0xee,0xbe]
|
|
|
|
v_mov_b32_e32 [v1], [v2]
|
|
// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
|
|
|
|
v_rcp_f64 [v1,v2], [v2,v3]
|
|
// SICI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
|
|
// VI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x4b,0x02,0x7e]
|
|
|
|
buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1
|
|
// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
|
|
// VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
|
|
|
|
buffer_load_dword v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], s1
|
|
// SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
|
|
// VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
|
|
|
|
buffer_store_format_xyzw v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
|
|
// SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
|
|
// VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
|
|
|
|
buffer_load_ubyte v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
|
|
// SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
|
|
// VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
|
|
|
|
buffer_store_dwordx4 v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
|
|
// SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
|
|
// VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
|
|
|
|
s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4
|
|
// SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
|
|
// VI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
|
|
|
|
s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
|
|
// SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
|
|
// VI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
|
|
|
|
s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
|
|
// SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
|
|
// VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
|
|
|
|
s_buffer_load_dwordx4 [ttmp[8],ttmp[8+1],ttmp[5*2],ttmp[(3+2)*2+1]], ttmp[45/11:(33+45)/11], ttmp4
|
|
// SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
|
|
// VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
|
|
|
|
s_buffer_load_dwordx4 ttmp[7+1:(3+2)*2+1], [ttmp[45/11],ttmp[5],ttmp6,ttmp[(33+45)/11]], ttmp4
|
|
// SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
|
|
// VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
|
|
|
|
flat_load_dword v[8:8], v[2:3]
|
|
// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dword v[63/8+1:65/8], v[2:3]
|
|
// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dword v8, v[2*2-2:(3+7)/3]
|
|
// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dword v[63/8+1], v[2:3]
|
|
// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dwordx4 v[8:11], v[2*2-2:(3*3-6)]
|
|
// VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dwordx4 v[8/2+4:11/2+6], v[2:3]
|
|
// VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08]
|
|
|
|
flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3]
|
|
// VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08]
|