1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[Utils][x86] add an option to reduce scrubbing of shuffles with memops

I was drafting a patch that would increase broadcast load usage,
but our shuffle scrubbing makes it impossible to see if the memory
operand offset was getting created correctly. I'm proposing to make
that an option (defaulted to 'off' for now to reduce regression
test churn).

The updated files provide examples of tests where we can now verify
that the pointer offset for a loaded memory operand is correct. We
still have stack and constant scrubbing that can obscure the operand
even if we don't scrub the entire instruction.

Differential Revision: https://reviews.llvm.org/D74775
This commit is contained in:
Sanjay Patel 2020-02-20 09:22:56 -05:00
parent 8b6d8bc210
commit 6cb1ff7fe4
4 changed files with 36 additions and 22 deletions

View File

@ -169,12 +169,12 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
; X86-LABEL: splat_load_2f64_11:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
; X86-NEXT: retl
;
; X64-LABEL: splat_load_2f64_11:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
; X64-NEXT: retq
%x = load <2 x double>, <2 x double>* %ptr
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>

View File

@ -9,13 +9,13 @@ define i32 @t(<2 x i64>* %val) nounwind {
; X32-SSE2-LABEL: t:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X32-SSE2-NEXT: pshufd $78, (%eax), %xmm0 # xmm0 = mem[2,3,0,1]
; X32-SSE2-NEXT: movd %xmm0, %eax
; X32-SSE2-NEXT: retl
;
; X64-SSSE3-LABEL: t:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X64-SSSE3-NEXT: pshufd $78, (%rdi), %xmm0 # xmm0 = mem[2,3,0,1]
; X64-SSSE3-NEXT: movd %xmm0, %eax
; X64-SSSE3-NEXT: retq
;
@ -59,13 +59,13 @@ define void @t3(<2 x double>* %a0) {
;
; X64-SSSE3-LABEL: t3:
; X64-SSSE3: # %bb.0: # %bb
; X64-SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-SSSE3-NEXT: movsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
; X64-SSSE3-NEXT: movsd %xmm0, (%rax)
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: t3:
; X64-AVX: # %bb.0: # %bb
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-AVX-NEXT: vmovsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
; X64-AVX-NEXT: retq
bb:
@ -138,7 +138,7 @@ define float @t6(<8 x float> *%a0) {
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
@ -150,10 +150,10 @@ define float @t6(<8 x float> *%a0) {
;
; X64-SSSE3-LABEL: t6:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
@ -161,10 +161,10 @@ define float @t6(<8 x float> *%a0) {
;
; X64-AVX-LABEL: t6:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: retq
%vecload = load <8 x float>, <8 x float>* %a0, align 32
@ -183,7 +183,7 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
; X32-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpltss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss (%eax), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
@ -192,10 +192,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
;
; X64-SSSE3-LABEL: PR43971:
; X64-SSSE3: # %bb.0: # %entry
; X64-SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss 24(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: xorps %xmm1, %xmm1
; X64-SSSE3-NEXT: cmpltss %xmm0, %xmm1
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm1, %xmm2
; X64-SSSE3-NEXT: andnps %xmm0, %xmm1
; X64-SSSE3-NEXT: orps %xmm2, %xmm1
@ -204,10 +204,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
;
; X64-AVX-LABEL: PR43971:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; X64-AVX-NEXT: vpermilpd $1, 16(%rdi), %xmm0 # xmm0 = mem[1,0]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, (%rsi)
; X64-AVX-NEXT: retq
@ -230,7 +230,7 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
@ -241,10 +241,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
;
; X64-SSSE3-LABEL: PR43971_1:
; X64-SSSE3: # %bb.0: # %entry
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
@ -252,10 +252,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
;
; X64-AVX-LABEL: PR43971_1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: retq
entry:

View File

@ -148,6 +148,12 @@ SCRUB_X86_SHUFFLES_RE = (
re.compile(
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
flags=re.M))
SCRUB_X86_SHUFFLES_NO_MEM_RE = (
re.compile(
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = (?!.*(?:mem)).*)$',
flags=re.M))
SCRUB_X86_SPILL_RELOAD_RE = (
re.compile(
r'-?\d+\(%([er])[sb]p\)(.*(?:Spill|Reload))$',
@ -163,8 +169,13 @@ def scrub_asm_x86(asm, args):
asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
# Expand the tabs used for indentation.
asm = string.expandtabs(asm, 2)
# Detect shuffle asm comments and hide the operands in favor of the comments.
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
if getattr(args, 'no_x86_scrub_mem_shuffle', True):
asm = SCRUB_X86_SHUFFLES_NO_MEM_RE.sub(r'\1 {{.*#+}} \2', asm)
else:
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
# Detect stack spills and reloads and hide their exact offset and whether
# they used the stack pointer or frame pointer.
asm = SCRUB_X86_SPILL_RELOAD_RE.sub(r'{{[-0-9]+}}(%\1{{[sb]}}p)\2', asm)

View File

@ -36,6 +36,9 @@ def main():
help='Use more regex for x86 matching to reduce diffs between various subtargets')
parser.add_argument(
'--no_x86_scrub_rip', action='store_false', dest='x86_scrub_rip')
parser.add_argument(
'--no_x86_scrub_mem_shuffle', action='store_true', default=False,
help='Reduce scrubbing shuffles with memory operands')
parser.add_argument('tests', nargs='+')
args = common.parse_commandline_args(parser)