1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
Thomas Lively adbadea361 [WebAssembly] Implement truncating vector stores
Rather than expanding truncating stores so that vectors are stored one
lane at a time, lower them to a sequence of instructions using
narrowing operations instead, when possible. Since the narrowing
operations have saturating semantics, but truncating stores require
truncation, mask the stored value to manually truncate it before
narrowing. Also, since narrowing is a binary operation, pass in the
original vector as the unused second argument.

Differential Revision: https://reviews.llvm.org/D84377
2020-07-28 17:46:45 -07:00

3362 lines
117 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
; Test SIMD loads and stores
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; ==============================================================================
; 16 x i8
; ==============================================================================
define <16 x i8> @load_v16i8(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8:
; CHECK: .functype load_v16i8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* %p
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8(i8* %p) {
; CHECK-LABEL: load_splat_v16i8:
; CHECK: .functype load_splat_v16i8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v8x16.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* %p
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8_with_folded_offset:
; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <16 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <16 x i8>*
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v8x16.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i8* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to i8*
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v8x16.load_splat 1
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i8, i8* %p, i32 1
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -1
; CHECK-NEXT: i32.add
; CHECK-NEXT: v8x16.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i8, i8* %p, i32 -1
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8_with_unfolded_offset:
; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <16 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <16 x i8>*
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v8x16.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i8* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to i8*
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.add
; CHECK-NEXT: v8x16.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i8, i8* %p, i32 1
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define <16 x i8> @load_v16i8_from_numeric_address() {
; CHECK-LABEL: load_v16i8_from_numeric_address:
; CHECK: .functype load_v16i8_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <16 x i8>*
%v = load <16 x i8>, <16 x i8>* %s
ret <16 x i8> %v
}
define <16 x i8> @load_splat_v16i8_from_numeric_address() {
; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v8x16.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to i8*
%e = load i8, i8* %s
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
define <16 x i8> @load_v16i8_from_global_address() {
; CHECK-LABEL: load_v16i8_from_global_address:
; CHECK: .functype load_v16i8_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v16i8
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* @gv_v16i8
ret <16 x i8> %v
}
@gv_i8 = global i8 42
define <16 x i8> @load_splat_v16i8_from_global_address() {
; CHECK-LABEL: load_splat_v16i8_from_global_address:
; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v8x16.load_splat gv_i8
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* @gv_i8
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8:
; CHECK: .functype store_v16i8 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v , <16 x i8>* %p
ret void
}
define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8_with_folded_offset:
; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <16 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <16 x i8>*
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8_with_unfolded_offset:
; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <16 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <16 x i8>*
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
; CHECK-LABEL: store_v16i8_to_numeric_address:
; CHECK: .functype store_v16i8_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <16 x i8>*
store <16 x i8> %v , <16 x i8>* %s
ret void
}
define void @store_v16i8_to_global_address(<16 x i8> %v) {
; CHECK-LABEL: store_v16i8_to_global_address:
; CHECK: .functype store_v16i8_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v16i8
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v , <16 x i8>* @gv_v16i8
ret void
}
; ==============================================================================
; 8 x i16
; ==============================================================================
define <8 x i16> @load_v8i16(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16:
; CHECK: .functype load_v8i16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* %p
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16(i16* %p) {
; CHECK-LABEL: load_splat_v8i16:
; CHECK: .functype load_splat_v8i16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v16x8.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16:
; CHECK: .functype load_sext_v8i16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16:
; CHECK: .functype load_zext_v8i16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16:
; CHECK: .functype load_ext_v8i16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16_with_folded_offset:
; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i16>*
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v16x8.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i16* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to i16*
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v16x8.load_splat 2
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i16, i16* %p, i32 1
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -2
; CHECK-NEXT: i32.add
; CHECK-NEXT: v16x8.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i16, i16* %p, i32 -1
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16_with_unfolded_offset:
; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i16>*
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v16x8.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i16* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to i16*
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.add
; CHECK-NEXT: v16x8.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i16, i16* %p, i32 1
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i16x8.load8x8_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
define <8 x i16> @load_v8i16_from_numeric_address() {
; CHECK-LABEL: load_v8i16_from_numeric_address:
; CHECK: .functype load_v8i16_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i16>*
%v = load <8 x i16>, <8 x i16>* %s
ret <8 x i16> %v
}
define <8 x i16> @load_splat_v8i16_from_numeric_address() {
; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v16x8.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to i16*
%e = load i16, i16* %s
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_from_numeric_address() {
; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_s 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_from_numeric_address() {
; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_from_numeric_address() {
; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i8>*
%v = load <8 x i8>, <8 x i8>* %s
ret <8 x i8> %v
}
@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
define <8 x i16> @load_v8i16_from_global_address() {
; CHECK-LABEL: load_v8i16_from_global_address:
; CHECK: .functype load_v8i16_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v8i16
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* @gv_v8i16
ret <8 x i16> %v
}
@gv_i16 = global i16 42
define <8 x i16> @load_splat_v8i16_from_global_address() {
; CHECK-LABEL: load_splat_v8i16_from_global_address:
; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v16x8.load_splat gv_i16
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* @gv_i16
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
define <8 x i16> @load_sext_v8i16_from_global_address() {
; CHECK-LABEL: load_sext_v8i16_from_global_address:
; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_s gv_v8i8
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_zext_v8i16_from_global_address() {
; CHECK-LABEL: load_zext_v8i16_from_global_address:
; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_u gv_v8i8
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
%v2 = zext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i8> @load_ext_v8i16_from_global_address() {
; CHECK-LABEL: load_ext_v8i16_from_global_address:
; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i16x8.load8x8_u gv_v8i8
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* @gv_v8i8
ret <8 x i8> %v
}
define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16:
; CHECK: .functype store_v8i16 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v , <8 x i16>* %p
ret void
}
define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16:
; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
store <8 x i8> %v, <8 x i8>* %p
ret void
}
define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_folded_offset:
; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i16>*
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_offset:
; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i16>*
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <8 x i8>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <8 x i8>*
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
; CHECK-LABEL: store_v8i16_to_numeric_address:
; CHECK: .functype store_v8i16_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i16>*
store <8 x i16> %v , <8 x i16>* %s
ret void
}
define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <8 x i8>*
store <8 x i8> %v , <8 x i8>* %s
ret void
}
define void @store_v8i16_to_global_address(<8 x i16> %v) {
; CHECK-LABEL: store_v8i16_to_global_address:
; CHECK: .functype store_v8i16_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v8i16
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v , <8 x i16>* @gv_v8i16
ret void
}
define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store gv_v8i8
; CHECK-NEXT: # fallthrough-return
store <8 x i8> %v , <8 x i8>* @gv_v8i8
ret void
}
; ==============================================================================
; 4 x i32
; ==============================================================================
define <4 x i32> @load_v4i32(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32:
; CHECK: .functype load_v4i32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* %p
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32(i32* %addr) {
; CHECK-LABEL: load_splat_v4i32:
; CHECK: .functype load_splat_v4i32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %addr, align 4
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32:
; CHECK: .functype load_sext_v4i32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32:
; CHECK: .functype load_zext_v4i32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32:
; CHECK: .functype load_ext_v4i32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32_with_folded_offset:
; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i32>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i32>*
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i32* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to i32*
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 4
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i32, i32* %p, i32 1
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -4
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i32, i32* %p, i32 -1
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32_with_unfolded_offset:
; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i32>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i32>*
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i32* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to i32*
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i32, i32* %p, i32 1
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32x4.load16x4_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
define <4 x i32> @load_v4i32_from_numeric_address() {
; CHECK-LABEL: load_v4i32_from_numeric_address:
; CHECK: .functype load_v4i32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i32>*
%v = load <4 x i32>, <4 x i32>* %s
ret <4 x i32> %v
}
define <4 x i32> @load_splat_v4i32_from_numeric_address() {
; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v32x4.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to i32*
%e = load i32, i32* %s
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_from_numeric_address() {
; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_s 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_from_numeric_address() {
; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_from_numeric_address() {
; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i16>*
%v = load <4 x i16>, <4 x i16>* %s
ret <4 x i16> %v
}
@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
define <4 x i32> @load_v4i32_from_global_address() {
; CHECK-LABEL: load_v4i32_from_global_address:
; CHECK: .functype load_v4i32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v4i32
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* @gv_v4i32
ret <4 x i32> %v
}
@gv_i32 = global i32 42
define <4 x i32> @load_splat_v4i32_from_global_address() {
; CHECK-LABEL: load_splat_v4i32_from_global_address:
; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v32x4.load_splat gv_i32
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* @gv_i32
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
define <4 x i32> @load_sext_v4i32_from_global_address() {
; CHECK-LABEL: load_sext_v4i32_from_global_address:
; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_s gv_v4i16
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_zext_v4i32_from_global_address() {
; CHECK-LABEL: load_zext_v4i32_from_global_address:
; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_u gv_v4i16
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
%v2 = zext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i16> @load_ext_v4i32_from_global_address() {
; CHECK-LABEL: load_ext_v4i32_from_global_address:
; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32x4.load16x4_u gv_v4i16
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* @gv_v4i16
ret <4 x i16> %v
}
define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32:
; CHECK: .functype store_v4i32 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v , <4 x i32>* %p
ret void
}
define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32:
; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x i16> %v , <4 x i16>* %p
ret void
}
define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_folded_offset:
; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i32>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i32>*
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_offset:
; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i32>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i32>*
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x i16>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x i16>*
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
; CHECK-LABEL: store_v4i32_to_numeric_address:
; CHECK: .functype store_v4i32_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i32>*
store <4 x i32> %v , <4 x i32>* %s
ret void
}
define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x i16>*
store <4 x i16> %v , <4 x i16>* %s
ret void
}
define void @store_v4i32_to_global_address(<4 x i32> %v) {
; CHECK-LABEL: store_v4i32_to_global_address:
; CHECK: .functype store_v4i32_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v4i32
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v , <4 x i32>* @gv_v4i32
ret void
}
define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i64.store gv_v4i16
; CHECK-NEXT: # fallthrough-return
store <4 x i16> %v , <4 x i16>* @gv_v4i16
ret void
}
; ==============================================================================
; 2 x i64
; ==============================================================================
define <2 x i64> @load_v2i64(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64:
; CHECK: .functype load_v2i64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* %p
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64(i64* %p) {
; CHECK-LABEL: load_splat_v2i64:
; CHECK: .functype load_splat_v2i64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64:
; CHECK: .functype load_sext_v2i64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* %p
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64:
; CHECK: .functype load_zext_v2i64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* %p
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64:
; CHECK: .functype load_ext_v2i64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* %p
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64_with_folded_offset:
; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i64>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x i64>*
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i64* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to i64*
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_s 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i64, i64* %p, i32 1
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_s 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.load32x2_u 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i64, i64* %p, i32 -1
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64_with_unfolded_offset:
; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i64>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x i64>*
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i64* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to i64*
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_s 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i32>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i64, i64* %p, i32 1
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_s 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i64x2.load32x2_u 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
define <2 x i64> @load_v2i64_from_numeric_address() {
; CHECK-LABEL: load_v2i64_from_numeric_address:
; CHECK: .functype load_v2i64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x i64>*
%v = load <2 x i64>, <2 x i64>* %s
ret <2 x i64> %v
}
define <2 x i64> @load_splat_v2i64_from_numeric_address() {
; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v64x2.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to i64*
%e = load i64, i64* %s
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_sext_v2i64_from_numeric_address() {
; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_s 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_from_numeric_address() {
; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_from_numeric_address() {
; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_u 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x i32>*
%v = load <2 x i32>, <2 x i32>* %s
ret <2 x i32> %v
}
@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
define <2 x i64> @load_v2i64_from_global_address() {
; CHECK-LABEL: load_v2i64_from_global_address:
; CHECK: .functype load_v2i64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v2i64
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* @gv_v2i64
ret <2 x i64> %v
}
@gv_i64 = global i64 42
define <2 x i64> @load_splat_v2i64_from_global_address() {
; CHECK-LABEL: load_splat_v2i64_from_global_address:
; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v64x2.load_splat gv_i64
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* @gv_i64
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
define <2 x i64> @load_sext_v2i64_from_global_address() {
; CHECK-LABEL: load_sext_v2i64_from_global_address:
; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_s gv_v2i32
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
%v2 = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i64> @load_zext_v2i64_from_global_address() {
; CHECK-LABEL: load_zext_v2i64_from_global_address:
; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_u gv_v2i32
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
%v2 = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %v2
}
define <2 x i32> @load_ext_v2i64_from_global_address() {
; CHECK-LABEL: load_ext_v2i64_from_global_address:
; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i64x2.load32x2_u gv_v2i32
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i32>, <2 x i32>* @gv_v2i32
ret <2 x i32> %v
}
define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64:
; CHECK: .functype store_v2i64 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v , <2 x i64>* %p
ret void
}
define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64_with_folded_offset:
; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i64>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x i64>*
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64_with_unfolded_offset:
; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x i64>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x i64>*
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
; CHECK-LABEL: store_v2i64_to_numeric_address:
; CHECK: .functype store_v2i64_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x i64>*
store <2 x i64> %v , <2 x i64>* %s
ret void
}
define void @store_v2i64_to_global_address(<2 x i64> %v) {
; CHECK-LABEL: store_v2i64_to_global_address:
; CHECK: .functype store_v2i64_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v2i64
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v , <2 x i64>* @gv_v2i64
ret void
}
; ==============================================================================
; 4 x float
; ==============================================================================
define <4 x float> @load_v4f32(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32:
; CHECK: .functype load_v4f32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* %p
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32(float* %p) {
; CHECK-LABEL: load_splat_v4f32:
; CHECK: .functype load_splat_v4f32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load float, float* %p
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32_with_folded_offset:
; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x float>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x float>*
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint float* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to float*
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v32x4.load_splat 4
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds float, float* %p, i32 1
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -4
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds float, float* %p, i32 -1
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32_with_unfolded_offset:
; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x float>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x float>*
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint float* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to float*
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x float>, <4 x float>* %p, i32 1
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.add
; CHECK-NEXT: v32x4.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr float, float* %p, i32 1
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define <4 x float> @load_v4f32_from_numeric_address() {
; CHECK-LABEL: load_v4f32_from_numeric_address:
; CHECK: .functype load_v4f32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x float>*
%v = load <4 x float>, <4 x float>* %s
ret <4 x float> %v
}
define <4 x float> @load_splat_v4f32_from_numeric_address() {
; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v32x4.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to float*
%e = load float, float* %s
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
define <4 x float> @load_v4f32_from_global_address() {
; CHECK-LABEL: load_v4f32_from_global_address:
; CHECK: .functype load_v4f32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v4f32
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* @gv_v4f32
ret <4 x float> %v
}
@gv_f32 = global float 42.
define <4 x float> @load_splat_v4f32_from_global_address() {
; CHECK-LABEL: load_splat_v4f32_from_global_address:
; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v32x4.load_splat gv_f32
; CHECK-NEXT: # fallthrough-return
%e = load float, float* @gv_f32
%v1 = insertelement <4 x float> undef, float %e, i32 0
%v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %v2
}
define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32:
; CHECK: .functype store_v4f32 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v , <4 x float>* %p
ret void
}
define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32_with_folded_offset:
; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x float>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <4 x float>*
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32_with_unfolded_offset:
; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <4 x float>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <4 x float>*
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <4 x float>, <4 x float>* %p, i32 1
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_to_numeric_address(<4 x float> %v) {
; CHECK-LABEL: store_v4f32_to_numeric_address:
; CHECK: .functype store_v4f32_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <4 x float>*
store <4 x float> %v , <4 x float>* %s
ret void
}
define void @store_v4f32_to_global_address(<4 x float> %v) {
; CHECK-LABEL: store_v4f32_to_global_address:
; CHECK: .functype store_v4f32_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v4f32
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v , <4 x float>* @gv_v4f32
ret void
}
; ==============================================================================
; 2 x double
; ==============================================================================
define <2 x double> @load_v2f64(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64:
; CHECK: .functype load_v2f64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* %p
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64(double* %p) {
; CHECK-LABEL: load_splat_v2f64:
; CHECK: .functype load_splat_v2f64 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load double, double* %p
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64_with_folded_offset:
; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x double>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x double>*
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint double* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to double*
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v64x2.load_splat 8
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds double, double* %p, i32 1
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds double, double* %p, i32 -1
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64_with_unfolded_offset:
; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x double>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x double>*
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint double* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to double*
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x double>, <2 x double>* %p, i32 1
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: v64x2.load_splat 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr double, double* %p, i32 1
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define <2 x double> @load_v2f64_from_numeric_address() {
; CHECK-LABEL: load_v2f64_from_numeric_address:
; CHECK: .functype load_v2f64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x double>*
%v = load <2 x double>, <2 x double>* %s
ret <2 x double> %v
}
define <2 x double> @load_splat_v2f64_from_numeric_address() {
; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v64x2.load_splat 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to double*
%e = load double, double* %s
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
@gv_v2f64 = global <2 x double> <double 42., double 42.>
define <2 x double> @load_v2f64_from_global_address() {
; CHECK-LABEL: load_v2f64_from_global_address:
; CHECK: .functype load_v2f64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v128.load gv_v2f64
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* @gv_v2f64
ret <2 x double> %v
}
@gv_f64 = global double 42.
define <2 x double> @load_splat_v2f64_from_global_address() {
; CHECK-LABEL: load_splat_v2f64_from_global_address:
; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: v64x2.load_splat gv_f64
; CHECK-NEXT: # fallthrough-return
%e = load double, double* @gv_f64
%v1 = insertelement <2 x double> undef, double %e, i32 0
%v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %v2
}
define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64:
; CHECK: .functype store_v2f64 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v , <2 x double>* %p
ret void
}
define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64_with_folded_offset:
; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x double>* %p to i32
%r = add nuw i32 %q, 16
%s = inttoptr i32 %r to <2 x double>*
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64_with_unfolded_offset:
; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint <2 x double>* %p to i32
%r = add nsw i32 %q, 16
%s = inttoptr i32 %r to <2 x double>*
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr <2 x double>, <2 x double>* %p, i32 1
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_to_numeric_address(<2 x double> %v) {
; CHECK-LABEL: store_v2f64_to_numeric_address:
; CHECK: .functype store_v2f64_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store 32
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 32 to <2 x double>*
store <2 x double> %v , <2 x double>* %s
ret void
}
define void @store_v2f64_to_global_address(<2 x double> %v) {
; CHECK-LABEL: store_v2f64_to_global_address:
; CHECK: .functype store_v2f64_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store gv_v2f64
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v , <2 x double>* @gv_v2f64
ret void
}