mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
81bb5f99ad
The data layout strings do not have any effect on llc tests and will become misleadingly out of date as we continue to update the canonical data layout, so remove them from the tests. Differential Revision: https://reviews.llvm.org/D105842
3345 lines
117 KiB
LLVM
3345 lines
117 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
|
|
|
|
; Test SIMD loads and stores
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
; ==============================================================================
|
|
; 16 x i8
|
|
; ==============================================================================
|
|
define <16 x i8> @load_v16i8(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8:
|
|
; CHECK: .functype load_v16i8 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <16 x i8>, <16 x i8>* %p
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8:
|
|
; CHECK: .functype load_splat_v16i8 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load8_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i8, i8* %p
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8_with_folded_offset:
|
|
; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <16 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <16 x i8>*
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
|
|
; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load8_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i8* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to i8*
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
|
|
; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load8_splat 1
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, i8* %p, i32 1
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -1
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load8_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i8, i8* %p, i32 -1
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8_with_unfolded_offset:
|
|
; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <16 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <16 x i8>*
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load8_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i8* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to i8*
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
|
|
; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
|
|
; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 1
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load8_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr i8, i8* %p, i32 1
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_from_numeric_address() {
|
|
; CHECK-LABEL: load_v16i8_from_numeric_address:
|
|
; CHECK: .functype load_v16i8_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <16 x i8>*
|
|
%v = load <16 x i8>, <16 x i8>* %s
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
define <16 x i8> @load_splat_v16i8_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
|
|
; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load8_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to i8*
|
|
%e = load i8, i8* %s
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
|
|
define <16 x i8> @load_v16i8_from_global_address() {
|
|
; CHECK-LABEL: load_v16i8_from_global_address:
|
|
; CHECK: .functype load_v16i8_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v16i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <16 x i8>, <16 x i8>* @gv_v16i8
|
|
ret <16 x i8> %v
|
|
}
|
|
|
|
@gv_i8 = global i8 42
|
|
define <16 x i8> @load_splat_v16i8_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v16i8_from_global_address:
|
|
; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load8_splat gv_i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i8, i8* @gv_i8
|
|
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
|
|
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
ret <16 x i8> %v2
|
|
}
|
|
|
|
define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8:
|
|
; CHECK: .functype store_v16i8 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <16 x i8> %v , <16 x i8>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8_with_folded_offset:
|
|
; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <16 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <16 x i8>*
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
|
|
; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8_with_unfolded_offset:
|
|
; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <16 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <16 x i8>*
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
|
|
; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: store_v16i8_to_numeric_address:
|
|
; CHECK: .functype store_v16i8_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <16 x i8>*
|
|
store <16 x i8> %v , <16 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i8_to_global_address(<16 x i8> %v) {
|
|
; CHECK-LABEL: store_v16i8_to_global_address:
|
|
; CHECK: .functype store_v16i8_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v16i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <16 x i8> %v , <16 x i8>* @gv_v16i8
|
|
ret void
|
|
}
|
|
|
|
; ==============================================================================
|
|
; 8 x i16
|
|
; ==============================================================================
|
|
define <8 x i16> @load_v8i16(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16:
|
|
; CHECK: .functype load_v8i16 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i16>, <8 x i16>* %p
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16:
|
|
; CHECK: .functype load_splat_v8i16 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load16_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i16, i16* %p
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16:
|
|
; CHECK: .functype load_sext_v8i16 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* %p
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16:
|
|
; CHECK: .functype load_zext_v8i16 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* %p
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16:
|
|
; CHECK: .functype load_ext_v8i16 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* %p
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16_with_folded_offset:
|
|
; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i16>*
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
|
|
; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load16_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i16* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to i16*
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
|
|
; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_s 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
|
|
; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
|
|
; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load16_splat 2
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, i16* %p, i32 1
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_s 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -2
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load16_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i16, i16* %p, i32 -1
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i16>*
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load16_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i16* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to i16*
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
|
|
; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
|
|
; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 2
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load16_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr i16, i16* %p, i32 1
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
|
|
; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i16x8.load8x8_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_from_numeric_address() {
|
|
; CHECK-LABEL: load_v8i16_from_numeric_address:
|
|
; CHECK: .functype load_v8i16_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i16>*
|
|
%v = load <8 x i16>, <8 x i16>* %s
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
define <8 x i16> @load_splat_v8i16_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
|
|
; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load16_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to i16*
|
|
%e = load i16, i16* %s
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_sext_v8i16_from_numeric_address() {
|
|
; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
|
|
; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_s 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_from_numeric_address() {
|
|
; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
|
|
; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_from_numeric_address() {
|
|
; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
|
|
; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i8>*
|
|
%v = load <8 x i8>, <8 x i8>* %s
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
|
|
define <8 x i16> @load_v8i16_from_global_address() {
|
|
; CHECK-LABEL: load_v8i16_from_global_address:
|
|
; CHECK: .functype load_v8i16_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v8i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i16>, <8 x i16>* @gv_v8i16
|
|
ret <8 x i16> %v
|
|
}
|
|
|
|
@gv_i16 = global i16 42
|
|
define <8 x i16> @load_splat_v8i16_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v8i16_from_global_address:
|
|
; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load16_splat gv_i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i16, i16* @gv_i16
|
|
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
|
|
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
|
|
define <8 x i16> @load_sext_v8i16_from_global_address() {
|
|
; CHECK-LABEL: load_sext_v8i16_from_global_address:
|
|
; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_s gv_v8i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
|
|
%v2 = sext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i16> @load_zext_v8i16_from_global_address() {
|
|
; CHECK-LABEL: load_zext_v8i16_from_global_address:
|
|
; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_u gv_v8i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
|
|
%v2 = zext <8 x i8> %v to <8 x i16>
|
|
ret <8 x i16> %v2
|
|
}
|
|
|
|
define <8 x i8> @load_ext_v8i16_from_global_address() {
|
|
; CHECK-LABEL: load_ext_v8i16_from_global_address:
|
|
; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i16x8.load8x8_u gv_v8i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
|
|
ret <8 x i8> %v
|
|
}
|
|
|
|
|
|
define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16:
|
|
; CHECK: .functype store_v8i16 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <8 x i16> %v , <8 x i16>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16:
|
|
; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <8 x i8> %v, <8 x i8>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16_with_folded_offset:
|
|
; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i16>*
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
|
|
; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
|
|
; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i16>*
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
|
|
; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <8 x i8>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <8 x i8>*
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
|
|
; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: store_v8i16_to_numeric_address:
|
|
; CHECK: .functype store_v8i16_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i16>*
|
|
store <8 x i16> %v , <8 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
|
|
; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <8 x i8>*
|
|
store <8 x i8> %v , <8 x i8>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i16_to_global_address(<8 x i16> %v) {
|
|
; CHECK-LABEL: store_v8i16_to_global_address:
|
|
; CHECK: .functype store_v8i16_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v8i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <8 x i16> %v , <8 x i16>* @gv_v8i16
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
|
|
; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
|
|
; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.narrow_i16x8_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store gv_v8i8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <8 x i8> %v , <8 x i8>* @gv_v8i8
|
|
ret void
|
|
}
|
|
|
|
; ==============================================================================
|
|
; 4 x i32
|
|
; ==============================================================================
|
|
define <4 x i32> @load_v4i32(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32:
|
|
; CHECK: .functype load_v4i32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i32>, <4 x i32>* %p
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32(i32* %addr) {
|
|
; CHECK-LABEL: load_splat_v4i32:
|
|
; CHECK: .functype load_splat_v4i32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i32, i32* %addr, align 4
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32:
|
|
; CHECK: .functype load_sext_v4i32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* %p
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32:
|
|
; CHECK: .functype load_zext_v4i32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* %p
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32:
|
|
; CHECK: .functype load_ext_v4i32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* %p
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32_with_folded_offset:
|
|
; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i32>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i32>*
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
|
|
; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
|
|
; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i32* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to i32*
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
|
|
; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_s 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
|
|
; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
|
|
; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
|
|
; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 4
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, i32* %p, i32 1
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_s 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
|
|
; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -4
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i32, i32* %p, i32 -1
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i32>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i32>*
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
|
|
; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i32* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to i32*
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
|
|
; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
|
|
; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 4
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr i32, i32* %p, i32 1
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
|
|
; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i32x4.load16x4_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_from_numeric_address() {
|
|
; CHECK-LABEL: load_v4i32_from_numeric_address:
|
|
; CHECK: .functype load_v4i32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i32>*
|
|
%v = load <4 x i32>, <4 x i32>* %s
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
define <4 x i32> @load_splat_v4i32_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
|
|
; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load32_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to i32*
|
|
%e = load i32, i32* %s
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_sext_v4i32_from_numeric_address() {
|
|
; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
|
|
; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_s 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_from_numeric_address() {
|
|
; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
|
|
; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_from_numeric_address() {
|
|
; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
|
|
; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i16>*
|
|
%v = load <4 x i16>, <4 x i16>* %s
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
|
|
define <4 x i32> @load_v4i32_from_global_address() {
|
|
; CHECK-LABEL: load_v4i32_from_global_address:
|
|
; CHECK: .functype load_v4i32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v4i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i32>, <4 x i32>* @gv_v4i32
|
|
ret <4 x i32> %v
|
|
}
|
|
|
|
@gv_i32 = global i32 42
|
|
define <4 x i32> @load_splat_v4i32_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v4i32_from_global_address:
|
|
; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load32_splat gv_i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i32, i32* @gv_i32
|
|
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
|
|
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
|
|
define <4 x i32> @load_sext_v4i32_from_global_address() {
|
|
; CHECK-LABEL: load_sext_v4i32_from_global_address:
|
|
; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_s gv_v4i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
|
|
%v2 = sext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i32> @load_zext_v4i32_from_global_address() {
|
|
; CHECK-LABEL: load_zext_v4i32_from_global_address:
|
|
; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_u gv_v4i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
|
|
%v2 = zext <4 x i16> %v to <4 x i32>
|
|
ret <4 x i32> %v2
|
|
}
|
|
|
|
define <4 x i16> @load_ext_v4i32_from_global_address() {
|
|
; CHECK-LABEL: load_ext_v4i32_from_global_address:
|
|
; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32x4.load16x4_u gv_v4i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
|
|
ret <4 x i16> %v
|
|
}
|
|
|
|
define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32:
|
|
; CHECK: .functype store_v4i32 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x i32> %v , <4 x i32>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32:
|
|
; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x i16> %v , <4 x i16>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32_with_folded_offset:
|
|
; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i32>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i32>*
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
|
|
; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
|
|
; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i32>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i32>*
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
|
|
; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x i16>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x i16>*
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
|
|
; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: store_v4i32_to_numeric_address:
|
|
; CHECK: .functype store_v4i32_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i32>*
|
|
store <4 x i32> %v , <4 x i32>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
|
|
; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x i16>*
|
|
store <4 x i16> %v , <4 x i16>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i32_to_global_address(<4 x i32> %v) {
|
|
; CHECK-LABEL: store_v4i32_to_global_address:
|
|
; CHECK: .functype store_v4i32_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v4i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x i32> %v , <4 x i32>* @gv_v4i32
|
|
ret void
|
|
}
|
|
|
|
define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
|
|
; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
|
|
; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.narrow_i32x4_u
|
|
; CHECK-NEXT: i64x2.extract_lane 0
|
|
; CHECK-NEXT: i64.store gv_v4i16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x i16> %v , <4 x i16>* @gv_v4i16
|
|
ret void
|
|
}
|
|
|
|
; ==============================================================================
|
|
; 2 x i64
|
|
; ==============================================================================
|
|
define <2 x i64> @load_v2i64(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64:
|
|
; CHECK: .functype load_v2i64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i64>, <2 x i64>* %p
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64:
|
|
; CHECK: .functype load_splat_v2i64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i64, i64* %p
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64:
|
|
; CHECK: .functype load_sext_v2i64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* %p
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64:
|
|
; CHECK: .functype load_zext_v2i64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* %p
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64:
|
|
; CHECK: .functype load_ext_v2i64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* %p
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64_with_folded_offset:
|
|
; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i64>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i64>*
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
|
|
; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i64* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to i64*
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
|
|
; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_s 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
|
|
; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
|
|
; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, i64* %p, i32 1
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_s 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds i64, i64* %p, i32 -1
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i64>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i64>*
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint i64* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to i64*
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i32>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
|
|
; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
|
|
; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr i64, i64* %p, i32 1
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_s 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
|
|
; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: i64x2.load32x2_u 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_from_numeric_address() {
|
|
; CHECK-LABEL: load_v2i64_from_numeric_address:
|
|
; CHECK: .functype load_v2i64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x i64>*
|
|
%v = load <2 x i64>, <2 x i64>* %s
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
define <2 x i64> @load_splat_v2i64_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
|
|
; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load64_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to i64*
|
|
%e = load i64, i64* %s
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_sext_v2i64_from_numeric_address() {
|
|
; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
|
|
; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_s 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_from_numeric_address() {
|
|
; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
|
|
; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_from_numeric_address() {
|
|
; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
|
|
; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_u 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x i32>*
|
|
%v = load <2 x i32>, <2 x i32>* %s
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
|
|
define <2 x i64> @load_v2i64_from_global_address() {
|
|
; CHECK-LABEL: load_v2i64_from_global_address:
|
|
; CHECK: .functype load_v2i64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v2i64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i64>, <2 x i64>* @gv_v2i64
|
|
ret <2 x i64> %v
|
|
}
|
|
|
|
@gv_i64 = global i64 42
|
|
define <2 x i64> @load_splat_v2i64_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v2i64_from_global_address:
|
|
; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load64_splat gv_i64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load i64, i64* @gv_i64
|
|
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
|
|
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
|
|
define <2 x i64> @load_sext_v2i64_from_global_address() {
|
|
; CHECK-LABEL: load_sext_v2i64_from_global_address:
|
|
; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_s gv_v2i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
|
|
%v2 = sext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i64> @load_zext_v2i64_from_global_address() {
|
|
; CHECK-LABEL: load_zext_v2i64_from_global_address:
|
|
; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_u gv_v2i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
|
|
%v2 = zext <2 x i32> %v to <2 x i64>
|
|
ret <2 x i64> %v2
|
|
}
|
|
|
|
define <2 x i32> @load_ext_v2i64_from_global_address() {
|
|
; CHECK-LABEL: load_ext_v2i64_from_global_address:
|
|
; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i64x2.load32x2_u gv_v2i32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
|
|
ret <2 x i32> %v
|
|
}
|
|
|
|
define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64:
|
|
; CHECK: .functype store_v2i64 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <2 x i64> %v , <2 x i64>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64_with_folded_offset:
|
|
; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i64>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i64>*
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
|
|
; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64_with_unfolded_offset:
|
|
; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x i64>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x i64>*
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
|
|
; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: store_v2i64_to_numeric_address:
|
|
; CHECK: .functype store_v2i64_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x i64>*
|
|
store <2 x i64> %v , <2 x i64>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i64_to_global_address(<2 x i64> %v) {
|
|
; CHECK-LABEL: store_v2i64_to_global_address:
|
|
; CHECK: .functype store_v2i64_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v2i64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <2 x i64> %v , <2 x i64>* @gv_v2i64
|
|
ret void
|
|
}
|
|
|
|
; ==============================================================================
|
|
; 4 x float
|
|
; ==============================================================================
|
|
define <4 x float> @load_v4f32(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32:
|
|
; CHECK: .functype load_v4f32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x float>, <4 x float>* %p
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32:
|
|
; CHECK: .functype load_splat_v4f32 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load float, float* %p
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32_with_folded_offset:
|
|
; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x float>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x float>*
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
|
|
; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint float* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to float*
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
|
|
; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load32_splat 4
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds float, float* %p, i32 1
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -4
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds float, float* %p, i32 -1
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32_with_unfolded_offset:
|
|
; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x float>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x float>*
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint float* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to float*
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
|
|
; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x float>, <4 x float>* %p, i32 1
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
|
|
; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 4
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load32_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr float, float* %p, i32 1
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_from_numeric_address() {
|
|
; CHECK-LABEL: load_v4f32_from_numeric_address:
|
|
; CHECK: .functype load_v4f32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x float>*
|
|
%v = load <4 x float>, <4 x float>* %s
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define <4 x float> @load_splat_v4f32_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
|
|
; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load32_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to float*
|
|
%e = load float, float* %s
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
|
|
define <4 x float> @load_v4f32_from_global_address() {
|
|
; CHECK-LABEL: load_v4f32_from_global_address:
|
|
; CHECK: .functype load_v4f32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v4f32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <4 x float>, <4 x float>* @gv_v4f32
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
@gv_f32 = global float 42.
|
|
define <4 x float> @load_splat_v4f32_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v4f32_from_global_address:
|
|
; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load32_splat gv_f32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load float, float* @gv_f32
|
|
%v1 = insertelement <4 x float> undef, float %e, i32 0
|
|
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
ret <4 x float> %v2
|
|
}
|
|
|
|
define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32:
|
|
; CHECK: .functype store_v4f32 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x float> %v , <4 x float>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32_with_folded_offset:
|
|
; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x float>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x float>*
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
|
|
; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32_with_unfolded_offset:
|
|
; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <4 x float>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <4 x float>*
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
|
|
; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <4 x float>, <4 x float>* %p, i32 1
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_to_numeric_address(<4 x float> %v) {
|
|
; CHECK-LABEL: store_v4f32_to_numeric_address:
|
|
; CHECK: .functype store_v4f32_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <4 x float>*
|
|
store <4 x float> %v , <4 x float>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4f32_to_global_address(<4 x float> %v) {
|
|
; CHECK-LABEL: store_v4f32_to_global_address:
|
|
; CHECK: .functype store_v4f32_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v4f32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <4 x float> %v , <4 x float>* @gv_v4f32
|
|
ret void
|
|
}
|
|
|
|
; ==============================================================================
|
|
; 2 x double
|
|
; ==============================================================================
|
|
define <2 x double> @load_v2f64(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64:
|
|
; CHECK: .functype load_v2f64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x double>, <2 x double>* %p
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64:
|
|
; CHECK: .functype load_splat_v2f64 (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load double, double* %p
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64_with_folded_offset:
|
|
; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x double>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x double>*
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
|
|
; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint double* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to double*
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
|
|
; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
|
|
; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.load64_splat 8
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds double, double* %p, i32 1
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const -8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds double, double* %p, i32 -1
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64_with_unfolded_offset:
|
|
; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x double>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x double>*
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
|
|
; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint double* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to double*
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
|
|
; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x double>, <2 x double>* %p, i32 1
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
|
|
; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
|
|
; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: v128.load64_splat 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr double, double* %p, i32 1
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define <2 x double> @load_v2f64_from_numeric_address() {
|
|
; CHECK-LABEL: load_v2f64_from_numeric_address:
|
|
; CHECK: .functype load_v2f64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x double>*
|
|
%v = load <2 x double>, <2 x double>* %s
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
define <2 x double> @load_splat_v2f64_from_numeric_address() {
|
|
; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
|
|
; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load64_splat 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to double*
|
|
%e = load double, double* %s
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
@gv_v2f64 = global <2 x double> <double 42., double 42.>
|
|
define <2 x double> @load_v2f64_from_global_address() {
|
|
; CHECK-LABEL: load_v2f64_from_global_address:
|
|
; CHECK: .functype load_v2f64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load gv_v2f64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%v = load <2 x double>, <2 x double>* @gv_v2f64
|
|
ret <2 x double> %v
|
|
}
|
|
|
|
@gv_f64 = global double 42.
|
|
define <2 x double> @load_splat_v2f64_from_global_address() {
|
|
; CHECK-LABEL: load_splat_v2f64_from_global_address:
|
|
; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: v128.load64_splat gv_f64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%e = load double, double* @gv_f64
|
|
%v1 = insertelement <2 x double> undef, double %e, i32 0
|
|
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
|
|
ret <2 x double> %v2
|
|
}
|
|
|
|
define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64:
|
|
; CHECK: .functype store_v2f64 (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <2 x double> %v , <2 x double>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64_with_folded_offset:
|
|
; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x double>* %p to i32
|
|
%r = add nuw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x double>*
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
|
|
; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 16
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
|
|
; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const -16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64_with_unfolded_offset:
|
|
; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%q = ptrtoint <2 x double>* %p to i32
|
|
%r = add nsw i32 %q, 16
|
|
%s = inttoptr i32 %r to <2 x double>*
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
|
|
; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
|
|
; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32.add
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 0
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = getelementptr <2 x double>, <2 x double>* %p, i32 1
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_to_numeric_address(<2 x double> %v) {
|
|
; CHECK-LABEL: store_v2f64_to_numeric_address:
|
|
; CHECK: .functype store_v2f64_to_numeric_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store 32
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%s = inttoptr i32 32 to <2 x double>*
|
|
store <2 x double> %v , <2 x double>* %s
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2f64_to_global_address(<2 x double> %v) {
|
|
; CHECK-LABEL: store_v2f64_to_global_address:
|
|
; CHECK: .functype store_v2f64_to_global_address (v128) -> ()
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.store gv_v2f64
|
|
; CHECK-NEXT: # fallthrough-return
|
|
store <2 x double> %v , <2 x double>* @gv_v2f64
|
|
ret void
|
|
}
|