1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

Replace more uses of sse41 with sse4.1.

llc using the host cpu features and *waning* on unknown features is probably
not a good thing :-(

llvm-svn: 189144
This commit is contained in:
Rafael Espindola 2013-08-23 20:39:19 +00:00
parent e8f25b8c77
commit b9807cdcf1
39 changed files with 46 additions and 46 deletions

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mattr=+sse41
; RUN: llc < %s -mattr=+sse4.1
; rdar://5886601
; gcc testsuite: gcc.target/i386/sse4_1-pblendw.c
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"

View File

@ -1,6 +1,6 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
; RUN: -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
; RUN: -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
; RUN: FileCheck %s
; rdar://6808032

View File

@ -1,4 +1,4 @@
;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
;CHECK: @max
;CHECK: cmplepd

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1
; Make sure we are not crashing on this code.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse4.1
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,5 +1,5 @@
; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
; RUN: llc -march=x86-64 -mattr=+sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
; RUN: llc -march=x86-64 -mattr=-sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
; Test case for r146671
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7"

View File

@ -1,4 +1,4 @@
; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
; RUN: llc -march=x86-64 -mattr=-sse42,+sse4.1 < %s | FileCheck %s
; Make sure we don't load from the location pointed to by %p
; twice: it has non-obvious performance implications, and
; the relevant transformation doesn't know how to update

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
; In this test we check that sign-extend of the mask bit is performed by

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse41 -verify-machineinstrs
; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse4.1 -verify-machineinstrs
target triple = "x86_64-unknown-linux-gnu"
; PR10503

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
; rdar://12721174
; We should not fold movss into pshufd since pshufd expects m128 while movss

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
define i32 @test1() nounwind readonly {
entry:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
; RUN: grep xorps %t | count 1
; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization

View File

@ -1,4 +1,4 @@
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -mattr=+sse2,+sse41 | FileCheck %s
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -mattr=+sse2,+sse4.1 | FileCheck %s
; CHECK: func_4_8
; A single memory write

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
; RUN: grep mov %t | count 11

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: test1:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
; No check in a crash test

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
; No check in a crash test

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
; No check in a crash test

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
; No check in a crash test

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix SSE41
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX
define i32 @veccond128(<4 x i32> %input) {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck -check-prefix=CHECK-SSE %s
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
define float @test1(float %x) nounwind {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
; CHECK: vsel_float
; CHECK: pandn

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
;CHECK-LABEL: vsel_float:
;CHECK: blendvps

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64
@g16 = external global i16

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
;CHECK-LABEL: load_2_i8:
; A single 16-bit load

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 -o %t
; RUN: not grep extractps %t
; RUN: not grep pextrd %t
; RUN: not grep pshufd %t

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse4.1,-avx | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
; PR11674

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
; tests variable insert and extract of a 4 x i32

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
; RUN: grep pinsrd %t | count 1
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
; RUN: grep unpcklps %t | count 3
; RUN: grep unpckhps %t | count 1

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
; ModuleID = 'vec_shuffle-27.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; CHECK: pshufb

View File

@ -1,4 +1,4 @@
; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
; Splat test for v8i16
define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s
target datalayout = "e-p:32:32"
target triple = "i686-apple-darwin8.7.2"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse41
; RUN: llc < %s -march=x86-64 -mattr=+sse4.1
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin11.0.0"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
; CHECK: movd
; Test bit convert that requires widening in the operand.