mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-30 23:42:52 +01:00
52d36d0ccd
pmuludq is slow, but it turns out that all the unpacking and packing of the scalarized mul is even slower. 10% speedup on loop-vectorized paq8p. llvm-svn: 170985
15 lines
307 B
LLVM
15 lines
307 B
LLVM
; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
|
|
|
|
define <4 x i32> @test1(<4 x i32> %x, <4 x i32> %y) {
|
|
%m = mul <4 x i32> %x, %y
|
|
ret <4 x i32> %m
|
|
; CHECK: test1:
|
|
; CHECK: pshufd $49
|
|
; CHECK: pmuludq
|
|
; CHECK: pshufd $49
|
|
; CHECK: pmuludq
|
|
; CHECK: shufps $-120
|
|
; CHECK: pshufd $-40
|
|
; CHECK: ret
|
|
}
|