llvm-mirror/test/CodeGen/AMDGPU/split-scalar-i64-add.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

declare i32 @llvm.amdgcn.workitem.id.x() readnone

; This is broken because the low half of the 64-bit add remains on the
; SALU, but the upper half does not. The addc expects the carry bit
; set in vcc, which is undefined since the low scalar half add sets
; scc instead.

; FIXME: SIShrinkInstructions should force immediate fold.

; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f
; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}}
; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
  %v.val = load volatile i32, i32 addrspace(1)* %in
  %vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0
  %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
  %bc = bitcast <2 x i32> %vec.1 to i64
  %add = add i64 %bc, 399
  store i64 %add, i64 addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f
; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0
define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
  %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
  %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
  %bc = bitcast <2 x i32> %vec.1 to i64
  %add = add i64 %bc, 399
  store i64 %add, i64 addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
; SI: v_add_i32
; SI: v_addc_u32
define amdgpu_kernel void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
  %v.val = load volatile i32, i32 addrspace(1)* %in
  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
  %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
  %bc = bitcast <2 x i32> %vec.1 to i64
  %add = add i64 %bc, %val1
  store i64 %add, i64 addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}
define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
  %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
  %bc = bitcast <2 x i32> %vec.1 to i64
  %add = add i64 %bc, %val1
  store i64 %add, i64 addrspace(1)* %out, align 8
  ret void
}

; Doesn't use constants
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:
; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
define amdgpu_kernel void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
  %load = load i32, i32 addrspace(1)* %gep
  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
  %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
  %bc = bitcast <2 x i32> %vec.1 to i64
  %add = add i64 %bc, %val1
  store i64 %add, i64 addrspace(1)* %out, align 8
  ret void
}
AMDGPU: Stop assuming vreg for build_vector This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860 2015-11-03 00:30:48 +01:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Add failing testcase. This is broken when 64-bit add is only partially moved to the VALU. llvm-svn: 216933 2014-09-02 21:12:31 +02:00
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 07:02:01 +01:00			`declare i32 @llvm.amdgcn.workitem.id.x() readnone`
R600/SI: Add failing testcase. This is broken when 64-bit add is only partially moved to the VALU. llvm-svn: 216933 2014-09-02 21:12:31 +02:00
			`; This is broken because the low half of the 64-bit add remains on the`
			`; SALU, but the upper half does not. The addc expects the carry bit`
			`; set in vcc, which is undefined since the low scalar half add sets`
			`; scc instead.`

AMDGPU: Replace i64 add/sub lowering Use VOP3 add/addc like usual. This has some tradeoffs. Inline immediates fold a little better, but other constants are worse off. SIShrinkInstructions could be made smarter to handle these cases. This allows us to avoid selecting scalar adds where we need to track the carry in scc and replace its users. This makes it easier to use the carryless VALU adds. llvm-svn: 318340 2017-11-15 22:51:43 +01:00			`; FIXME: SIShrinkInstructions should force immediate fold.`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-01 19:15:17 +02:00			`; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:`
AMDGPU: Replace i64 add/sub lowering Use VOP3 add/addc like usual. This has some tradeoffs. Inline immediates fold a little better, but other constants are worse off. SIShrinkInstructions could be made smarter to handle these cases. This allows us to avoid selecting scalar adds where we need to track the carry in scc and replace its users. This makes it easier to use the carryless VALU adds. llvm-svn: 318340 2017-11-15 22:51:43 +01:00			`; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f`
			`; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}}`
AMDGPU: Stop assuming vreg for build_vector This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860 2015-11-03 00:30:48 +01:00			`; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-21 22:39:51 +01:00			`define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {`
AMDGPU: Stop assuming vreg for build_vector This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860 2015-11-03 00:30:48 +01:00			`%v.val = load volatile i32, i32 addrspace(1)* %in`
			`%vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0`
			`%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1`
			`%bc = bitcast <2 x i32> %vec.1 to i64`
			`%add = add i64 %bc, 399`
			`store i64 %add, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:`
			`; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f`
			`; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-21 22:39:51 +01:00			`define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {`
R600/SI: Add failing testcase. This is broken when 64-bit add is only partially moved to the VALU. llvm-svn: 216933 2014-09-02 21:12:31 +02:00			`%vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0`
			`%vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1`
			`%bc = bitcast <2 x i32> %vec.1 to i64`
			`%add = add i64 %bc, 399`
			`store i64 %add, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-01 19:15:17 +02:00			`; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:`
R600/SI: Move SIFixSGPRCopies to inst selector passes This should expose more of the actually used VALU instructions to the machine optimization passes. This also should help getting i1 handling into a better state. For not entirly understood reasons, this fixes the split-scalar-i64-add.ll test where a 64-bit add would only partially be moved to the VALU resulting in use of undefined VCC. llvm-svn: 222256 2014-11-18 22:06:58 +01:00			`; SI: v_add_i32`
			`; SI: v_addc_u32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-21 22:39:51 +01:00			`define amdgpu_kernel void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {`
AMDGPU: Stop assuming vreg for build_vector This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860 2015-11-03 00:30:48 +01:00			`%v.val = load volatile i32, i32 addrspace(1)* %in`
			`%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0`
			`%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1`
			`%bc = bitcast <2 x i32> %vec.1 to i64`
			`%add = add i64 %bc, %val1`
			`store i64 %add, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:`
			`; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}`
			`; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-21 22:39:51 +01:00			`define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {`
R600/SI: Add failing testcase. This is broken when 64-bit add is only partially moved to the VALU. llvm-svn: 216933 2014-09-02 21:12:31 +02:00			`%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0`
			`%vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1`
			`%bc = bitcast <2 x i32> %vec.1 to i64`
			`%add = add i64 %bc, %val1`
			`store i64 %add, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; Doesn't use constants`
AMDGPU: Stop assuming vreg for build_vector This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860 2015-11-03 00:30:48 +01:00			`; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:`
			`; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}`
			`; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-21 22:39:51 +01:00			`define amdgpu_kernel void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 07:02:01 +01:00			`%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone`
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction One of several parallel first steps to remove the target type of pointers, replacing them with a single opaque pointer type. This adds an explicit type parameter to the gep instruction so that when the first parameter becomes an opaque pointer type, the type to gep through is still available to the instructions. * This doesn't modify gep operators, only instructions (operators will be handled separately) * Textual IR changes only. Bitcode (including upgrade) and changing the in-memory representation will be in separate changes. * geps of vectors are transformed as: getelementptr <4 x float> %x, ... ->getelementptr float, <4 x float> %x, ... Then, once the opaque pointer type is introduced, this will ultimately look like: getelementptr float, <4 x ptr> %x with the unambiguous interpretation that it is a vector of pointers to float. * address spaces remain on the pointer, not the type: getelementptr float addrspace(1)* %x ->getelementptr float, float addrspace(1)* %x Then, eventually: getelementptr float, ptr addrspace(1) %x Importantly, the massive amount of test case churn has been automated by same crappy python code. I had to manually update a few test cases that wouldn't fit the script's model (r228970,r229196,r229197,r229198). The python script just massages stdin and writes the result to stdout, I then wrapped that in a shell script to handle replacing files, then using the usual find+xargs to migrate all the files. update.py: import fileinput import sys import re ibrep = re.compile(r"(^.?[^%\w]getelementptr inbounds )(((?:<\d x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") normrep = re.compile( r"(^.?[^%\w]getelementptr )(((?:<\d* x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") def conv(match, line): if not match: return line line = match.groups()[0] if len(match.groups()[5]) == 0: line += match.groups()[2] line += match.groups()[3] line += ", " line += match.groups()[1] line += "\n" return line for line in sys.stdin: if line.find("getelementptr ") == line.find("getelementptr inbounds"): if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("): line = conv(re.match(ibrep, line), line) elif line.find("getelementptr ") != line.find("getelementptr ("): line = conv(re.match(normrep, line), line) sys.stdout.write(line) apply.sh: for name in "$@" do python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name" rm -f "$name.tmp" done The actual commands: From llvm/src: find test/ -name .ll \| xargs ./apply.sh From llvm/src/tools/clang: find test/ -name .mm -o -name .m -o -name .cpp -o -name .c \| xargs -I '{}' ../../apply.sh "{}" From llvm/src/tools/polly: find test/ -name *.ll \| xargs ./apply.sh After that, check-all (with llvm, clang, clang-tools-extra, lld, compiler-rt, and polly all checked out). The extra 'rm' in the apply.sh script is due to a few files in clang's test suite using interesting unicode stuff that my python script was throwing exceptions on. None of those files needed to be migrated, so it seemed sufficient to ignore those cases. Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7636 llvm-svn: 230786 2015-02-27 20:29:02 +01:00			`%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-27 22:17:42 +01:00			`%load = load i32, i32 addrspace(1)* %gep`
R600/SI: Add failing testcase. This is broken when 64-bit add is only partially moved to the VALU. llvm-svn: 216933 2014-09-02 21:12:31 +02:00			`%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0`
			`%vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1`
			`%bc = bitcast <2 x i32> %vec.1 to i64`
			`%add = add i64 %bc, %val1`
			`store i64 %add, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`