1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 13:33:37 +02:00
llvm-mirror/test/CodeGen/X86/vector-idiv.ll

262 lines
6.1 KiB
LLVM
Raw Normal View History

; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41
; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX
define <4 x i32> @test1(<4 x i32> %a) {
%div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %div
; SSE41-LABEL: test1:
; SSE41: pmuludq
; SSE41: pshufd $49
; SSE41: pmuludq
; SSE41: shufps $-35
; SSE41: psubd
; SSE41: psrld $1
; SSE41: padd
; SSE41: psrld $2
; AVX-LABEL: test1:
; AVX: vpmuludq
; AVX: vpshufd $49
; AVX: vpmuludq
; AVX: vshufps $-35
; AVX: vpsubd
; AVX: vpsrld $1
; AVX: vpadd
; AVX: vpsrld $2
}
define <8 x i32> @test2(<8 x i32> %a) {
%div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %div
; AVX-LABEL: test2:
; AVX: vpbroadcastd
; AVX: vpalignr $4
; AVX: vpmuludq
; AVX: vpmuludq
; AVX: vpblendd $170
; AVX: vpsubd
; AVX: vpsrld $1
; AVX: vpadd
; AVX: vpsrld $2
}
define <8 x i16> @test3(<8 x i16> %a) {
%div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %div
; SSE41-LABEL: test3:
; SSE41: pmulhuw
; SSE41: psubw
; SSE41: psrlw $1
; SSE41: paddw
; SSE41: psrlw $2
; AVX-LABEL: test3:
; AVX: vpmulhuw
; AVX: vpsubw
; AVX: vpsrlw $1
; AVX: vpaddw
; AVX: vpsrlw $2
}
define <16 x i16> @test4(<16 x i16> %a) {
%div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %div
; AVX-LABEL: test4:
; AVX: vpmulhuw
; AVX: vpsubw
; AVX: vpsrlw $1
; AVX: vpaddw
; AVX: vpsrlw $2
; AVX-NOT: vpmulhuw
}
define <8 x i16> @test5(<8 x i16> %a) {
%div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %div
; SSE41-LABEL: test5:
; SSE41: pmulhw
; SSE41: psrlw $15
; SSE41: psraw $1
; SSE41: paddw
; AVX-LABEL: test5:
; AVX: vpmulhw
; AVX: vpsrlw $15
; AVX: vpsraw $1
; AVX: vpaddw
}
define <16 x i16> @test6(<16 x i16> %a) {
%div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %div
; AVX-LABEL: test6:
; AVX: vpmulhw
; AVX: vpsrlw $15
; AVX: vpsraw $1
; AVX: vpaddw
; AVX-NOT: vpmulhw
}
define <16 x i8> @test7(<16 x i8> %a) {
%div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %div
; FIXME: scalarized
; SSE41-LABEL: test7:
; SSE41: pext
; AVX-LABEL: test7:
; AVX: pext
}
define <4 x i32> @test8(<4 x i32> %a) {
%div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %div
; SSE41-LABEL: test8:
; SSE41: pmuldq
; SSE41: pshufd $49
2014-07-25 00:15:28 +02:00
; SSE41: pshufd $49
; SSE41: pmuldq
; SSE41: shufps $-35
; SSE41: pshufd $-40
; SSE41: padd
; SSE41: psrld $31
; SSE41: psrad $2
; SSE41: padd
; SSE-LABEL: test8:
; SSE: pmuludq
; SSE: pshufd $49
; SSE: pshufd $49
; SSE: pmuludq
; SSE: shufps $-35
; SSE: pshufd $-40
; SSE: psubd
; SSE: padd
; SSE: psrld $31
; SSE: psrad $2
; SSE: padd
; AVX-LABEL: test8:
; AVX: vpmuldq
; AVX: vpshufd $49
2014-07-25 00:15:28 +02:00
; AVX: vpshufd $49
; AVX: vpmuldq
; AVX: vshufps $-35
; AVX: vpshufd $-40
; AVX: vpadd
; AVX: vpsrld $31
; AVX: vpsrad $2
; AVX: vpadd
}
define <8 x i32> @test9(<8 x i32> %a) {
%div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %div
; AVX-LABEL: test9:
; AVX: vpbroadcastd
2014-07-25 00:15:28 +02:00
; AVX: vpalignr $4
; AVX: vpalignr $4
; AVX: vpmuldq
; AVX: vpmuldq
2014-07-25 00:15:28 +02:00
; AVX: vpalignr $4
; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
; AVX: vpsrad $2
; AVX: vpadd
}
define <8 x i32> @test10(<8 x i32> %a) {
%rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %rem
; AVX-LABEL: test10:
; AVX: vpbroadcastd
; AVX: vpalignr $4
; AVX: vpmuludq
; AVX: vpmuludq
; AVX: vpblendd $170
; AVX: vpsubd
; AVX: vpsrld $1
; AVX: vpadd
; AVX: vpsrld $2
; AVX: vpmulld
}
define <8 x i32> @test11(<8 x i32> %a) {
%rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %rem
; AVX-LABEL: test11:
; AVX: vpbroadcastd
2014-07-25 00:15:28 +02:00
; AVX: vpalignr $4
; AVX: vpalignr $4
; AVX: vpmuldq
; AVX: vpmuldq
2014-07-25 00:15:28 +02:00
; AVX: vpalignr $4
; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
; AVX: vpsrad $2
; AVX: vpadd
; AVX: vpmulld
}
define <2 x i16> @test12() {
%I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0
%I9 = insertelement <2 x i16> %I8, i16 -1, i32 1
%B9 = urem <2 x i16> %I9, %I9
ret <2 x i16> %B9
; AVX-LABEL: test12:
; AVX: xorps
}
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
define <4 x i32> @PR20355(<4 x i32> %a) {
; SSE-LABEL: PR20355:
; SSE: movdqa {{(.*LCPI|__xmm@55555556555555565555555655555556).*}}, %[[X1:xmm[0-9]+]]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
; SSE-NEXT: psrad $31, %[[X2]]
; SSE-NEXT: pand %xmm0, %[[X2]]
; SSE-NEXT: movdqa %xmm0, %[[X3:xmm[0-9]+]]
; SSE-NEXT: psrad $31, %[[X3]]
; SSE-NEXT: pand %[[X1]], %[[X3]]
; SSE-NEXT: paddd %[[X2]], %[[X3]]
; SSE-NEXT: pshufd {{.*}} #{{#?}} [[X4:xmm[0-9]+]] = xmm0[1,0,3,0]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE-NEXT: pmuludq %[[X1]], %xmm0
; SSE-NEXT: pshufd {{.*}} #{{#?}} [[X1]] = [[X1]][1,0,3,0]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE-NEXT: pmuludq %[[X4]], %[[X1]]
; SSE-NEXT: shufps {{.*}} #{{#?}} xmm0 = xmm0[1,3],[[X1]][1,3]
; SSE-NEXT: pshufd {{.*}} #{{#?}} [[X5:xmm[0-9]+]] = xmm0[0,2,1,3]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE-NEXT: psubd %[[X3]], %[[X5]]
; SSE-NEXT: movdqa %[[X5]], %xmm0
; SSE-NEXT: psrld $31, %xmm0
; SSE-NEXT: paddd %[[X5]], %xmm0
; SSE-NEXT: retq
;
; SSE41-LABEL: PR20355:
; SSE41: movdqa {{(.*LCPI|__xmm@55555556555555565555555655555556).*}}, %[[X1:xmm[0-9]+]]
; SSE41-NEXT: pshufd {{.*}} #{{#?}} [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE41-NEXT: pmuldq %[[X1]], %xmm0
; SSE41-NEXT: pshufd {{.*}} #{{#?}} [[X1]] = [[X1]][1,0,3,0]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE41-NEXT: pmuldq %[[X2]], %[[X1]]
; SSE41-NEXT: shufps {{.*}} #{{#?}} xmm0 = xmm0[1,3],[[X1]][1,3]
; SSE41-NEXT: pshufd {{.*}} #{{#?}} [[X3:xmm[0-9]+]] = xmm0[0,2,1,3]
[x86] Fix PR20355 (for real). There are many layers to this bug. The tale starts with r212808 which attempted to fix inversion of the low and high bits when lowering MUL_LOHI. Sadly, that commit did not include any positive test cases, and just removed some operations from a test case where the actual logic being changed isn't fully visible from the test. What this commit did was two things. First, it reversed the low and high results in the formation of the MERGE_VALUES node for the multiple results. This is entirely correct. Second it changed the shuffles for extracting the low and high components from the i64 results of the multiplies to extract them assuming a big-endian-style encoding of the multiply results. This second change is wrong. There is no big-endian encoding in x86, the results of the multiplies are normal v2i64s: when cast to v4i32, the low i32s are at offsets 0 and 2, and the high i32s are at offsets 1 and 3. However, the first change wasn't enough to actually fix the bug, which is (I assume) why the second change was also made. There was another bug in the MERGE_VALUES formation: we weren't using a VTList, and so were getting a single result node! When grabbing the *second* result from the node, we got... well.. colud be anything. I think this *appeared* to invert things, but had to be causing other problems as well. Fortunately, I fixed the MERGE_VALUES issue in r213931, so we should have been fine, right? NOOOPE! Because the core bug was never addressed, the test in vector-idiv failed when I fixed the MERGE_VALUES node. Because there are essentially no docs for this node, I had to guess at how to fix it and tried swapping the operands, restoring the order of the original code before r212808. While this "fixed" the test case (in that we produced the write instructions) we were still extracting the wrong elements of the i64s, and thus PR20355 was still broken. This commit essentially reverts the big-endian-style extraction part of r212808 and goes back to the original masks which were correct. Now that the MERGE_VALUES node formation is also correct, everything works. I've also included a more detailed test from PR20355 to make sure this stays fixed. llvm-svn: 214011
2014-07-26 05:46:57 +02:00
; SSE41-NEXT: movdqa %[[X3]], %xmm0
; SSE41-NEXT: psrld $31, %xmm0
; SSE41-NEXT: paddd %[[X3]], %xmm0
; SSE41-NEXT: retq
entry:
%sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %sdiv
}