mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-23 04:52:54 +02:00
334df8b0bc
This was introducing unnecessary padding after the explicit arguments, depending on the alignment of the total struct type. Also has the side effect of avoiding creating an extra GEP for the offset from the base kernel argument to the explicit kernel argument offset. llvm-svn: 335999
1449 lines
106 KiB
LLVM
1449 lines
106 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; FIXME: Manually added checks for metadata nodes at bottom
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s
|
|
; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
|
|
|
|
define amdgpu_kernel void @kern_noargs() {
|
|
; HSA-LABEL: @kern_noargs(
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noargs(
|
|
; MESA-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
|
|
; HSA-LABEL: @kern_i8(
|
|
; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8(
|
|
; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
|
|
; HSA-LABEL: @kern_i16(
|
|
; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i16(
|
|
; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f16(half %arg) #0 {
|
|
; HSA-LABEL: @kern_f16(
|
|
; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f16(
|
|
; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store half %arg, half addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i8(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i8(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !1, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i16(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i16(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !2, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i8(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i8(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !3, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i16(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i16(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
|
|
; HSA-LABEL: @kern_i8_i8(
|
|
; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8_i8(
|
|
; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef, align 1
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
|
|
; HSA-LABEL: @kern_v3i8(
|
|
; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i8(
|
|
; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i24(i24 %arg0) {
|
|
; HSA-LABEL: @kern_i24(
|
|
; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i24(
|
|
; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i24 %arg0, i24 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32(i32 %arg0) {
|
|
; HSA-LABEL: @kern_i32(
|
|
; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32(
|
|
; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f32(float %arg0) {
|
|
; HSA-LABEL: @kern_f32(
|
|
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f32(
|
|
; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store float %arg0, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
|
|
; HSA-LABEL: @kern_v3i32(
|
|
; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i32(
|
|
; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i32(
|
|
; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i32(
|
|
; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i32> %arg, <8 x i32> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i64(
|
|
; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i64(
|
|
; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i64> %arg, <8 x i64> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v16i64(
|
|
; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v16i64(
|
|
; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <16 x i64> %arg, <16 x i64> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
|
|
; HSA-LABEL: @kern_i32_v3i32(
|
|
; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32_v3i32(
|
|
; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
%struct.a = type { i32, i8, [4 x i8] }
|
|
%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
|
|
|
|
define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
|
|
; HSA-LABEL: @kern_struct_a(
|
|
; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_a(
|
|
; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.a %arg0, %struct.a addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
|
|
; HSA-LABEL: @kern_struct_b_packed(
|
|
; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_b_packed(
|
|
; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
|
|
; HSA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
|
|
; HSA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
|
|
; HSA-LABEL: @kern_lds_ptr(
|
|
; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
|
|
; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_lds_ptr(
|
|
; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
|
|
; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 0, i32 addrspace(3)* %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
|
|
; HSA-LABEL: @kern_lds_ptr_si(
|
|
; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_lds_ptr_si(
|
|
; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 0, i32 addrspace(3)* %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
store volatile i8 %arg3, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_v3i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_v3i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i16 %arg1, i16 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
store volatile i1 %arg2, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
store volatile i1 %arg2, i1 addrspace(1)* undef
|
|
store volatile i1 %arg3, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_v3i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_v3i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i16 %arg1, i16 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
store volatile i8 %arg3, i8 addrspace(1)* undef
|
|
store volatile i8 %arg5, i8 addrspace(1)* undef
|
|
store volatile i8 %arg6, i8 addrspace(1)* undef
|
|
store volatile i8 %arg7, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_f16_f16(
|
|
; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
|
|
; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_f16_f16(
|
|
; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
|
|
; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile half %arg0, half addrspace(1)* undef
|
|
store volatile half %arg1, half addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !5
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !6
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
|
|
; HSA-LABEL: @kern_nonnull_global_ptr(
|
|
; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_nonnull_global_ptr(
|
|
; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
|
|
; HSA-LABEL: @kern_align32_global_ptr(
|
|
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_align32_global_ptr(
|
|
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !7
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
|
|
; HSA-LABEL: @kern_noalias_global_ptr(
|
|
; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noalias_global_ptr(
|
|
; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
|
|
; HSA-LABEL: @kern_noalias_global_ptr_x2(
|
|
; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noalias_global_ptr_x2(
|
|
; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef
|
|
store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i8} %in, 0
|
|
%elt1 = extractvalue {i8, i8} %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i8 %elt1, i8 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i16_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i16_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i16} %in, 0
|
|
%elt1 = extractvalue {i8, i16} %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i16 %elt1, i16 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
|
|
; HSA-LABEL: @array_2xi8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i8] %in, 0
|
|
%elt1 = extractvalue [2 x i8] %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i8 %elt1, i8 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
|
|
; HSA-LABEL: @array_2xi1_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi1_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i1] %in, 0
|
|
%elt1 = extractvalue [2 x i1] %in, 1
|
|
store volatile i1 %elt0, i1 addrspace(1)* null, align 4
|
|
store volatile i1 %elt1, i1 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
|
|
; HSA-LABEL: @only_empty_struct(
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @only_empty_struct(
|
|
; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
|
|
; HSA-LABEL: @empty_struct_with_other(
|
|
; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @empty_struct_with_other(
|
|
; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "target-cpu"="kaveri" }
|
|
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
|
|
attributes #2 = { nounwind "target-cpu"="tahiti" }
|
|
|
|
; HSA: 0 = !{}
|
|
; HSA: !1 = !{i64 42}
|
|
; HSA: !2 = !{i64 128}
|
|
; HSA: !3 = !{i64 1024}
|
|
|
|
|
|
; MESA: !0 = !{}
|
|
; MESA: !1 = !{i32 0, i32 256}
|
|
; MESA: !2 = !{i32 0, i32 65536}
|
|
; MESA: !3 = !{i32 -128, i32 128}
|
|
; MESA: !4 = !{i32 -32768, i32 32768}
|
|
; MESA: !5 = !{i64 42}
|
|
; MESA: !6 = !{i64 128}
|
|
; MESA: !7 = !{i64 1024}
|