mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[Utils][x86] add an option to reduce scrubbing of shuffles with memops
I was drafting a patch that would increase broadcast load usage, but our shuffle scrubbing makes it impossible to see if the memory operand offset was getting created correctly. I'm proposing to make that an option (defaulted to 'off' for now to reduce regression test churn). The updated files provide examples of tests where we can now verify that the pointer offset for a loaded memory operand is correct. We still have stack and constant scrubbing that can obscure the operand even if we don't scrub the entire instruction. Differential Revision: https://reviews.llvm.org/D74775
This commit is contained in:
parent
8b6d8bc210
commit
6cb1ff7fe4
@ -169,12 +169,12 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
|
||||
; X86-LABEL: splat_load_2f64_11:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: splat_load_2f64_11:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
|
||||
; X64-NEXT: retq
|
||||
%x = load <2 x double>, <2 x double>* %ptr
|
||||
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
|
||||
|
@ -9,13 +9,13 @@ define i32 @t(<2 x i64>* %val) nounwind {
|
||||
; X32-SSE2-LABEL: t:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
|
||||
; X32-SSE2-NEXT: pshufd $78, (%eax), %xmm0 # xmm0 = mem[2,3,0,1]
|
||||
; X32-SSE2-NEXT: movd %xmm0, %eax
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSSE3-LABEL: t:
|
||||
; X64-SSSE3: # %bb.0:
|
||||
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
|
||||
; X64-SSSE3-NEXT: pshufd $78, (%rdi), %xmm0 # xmm0 = mem[2,3,0,1]
|
||||
; X64-SSSE3-NEXT: movd %xmm0, %eax
|
||||
; X64-SSSE3-NEXT: retq
|
||||
;
|
||||
@ -59,13 +59,13 @@ define void @t3(<2 x double>* %a0) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: t3:
|
||||
; X64-SSSE3: # %bb.0: # %bb
|
||||
; X64-SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSSE3-NEXT: movsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
|
||||
; X64-SSSE3-NEXT: movsd %xmm0, (%rax)
|
||||
; X64-SSSE3-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: t3:
|
||||
; X64-AVX: # %bb.0: # %bb
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: vmovsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: retq
|
||||
bb:
|
||||
@ -138,7 +138,7 @@ define float @t6(<8 x float> *%a0) {
|
||||
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -150,10 +150,10 @@ define float @t6(<8 x float> *%a0) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: t6:
|
||||
; X64-SSSE3: # %bb.0:
|
||||
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
|
||||
@ -161,10 +161,10 @@ define float @t6(<8 x float> *%a0) {
|
||||
;
|
||||
; X64-AVX-LABEL: t6:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
|
||||
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: retq
|
||||
%vecload = load <8 x float>, <8 x float>* %a0, align 32
|
||||
@ -183,7 +183,7 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
; X32-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpltss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss (%eax), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -192,10 +192,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
;
|
||||
; X64-SSSE3-LABEL: PR43971:
|
||||
; X64-SSSE3: # %bb.0: # %entry
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss 24(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; X64-SSSE3-NEXT: cmpltss %xmm0, %xmm1
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm1, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm0, %xmm1
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm1
|
||||
@ -204,10 +204,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
|
||||
;
|
||||
; X64-AVX-LABEL: PR43971:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
|
||||
; X64-AVX-NEXT: vpermilpd $1, 16(%rdi), %xmm0 # xmm0 = mem[1,0]
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, (%rsi)
|
||||
; X64-AVX-NEXT: retq
|
||||
@ -230,7 +230,7 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X32-SSE2-NEXT: andps %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
|
||||
; X32-SSE2-NEXT: orps %xmm2, %xmm1
|
||||
@ -241,10 +241,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
;
|
||||
; X64-SSSE3-LABEL: PR43971_1:
|
||||
; X64-SSSE3: # %bb.0: # %entry
|
||||
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
|
||||
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
|
||||
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
|
||||
@ -252,10 +252,10 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
|
||||
;
|
||||
; X64-AVX-LABEL: PR43971_1:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
|
||||
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -148,6 +148,12 @@ SCRUB_X86_SHUFFLES_RE = (
|
||||
re.compile(
|
||||
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
|
||||
flags=re.M))
|
||||
|
||||
SCRUB_X86_SHUFFLES_NO_MEM_RE = (
|
||||
re.compile(
|
||||
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = (?!.*(?:mem)).*)$',
|
||||
flags=re.M))
|
||||
|
||||
SCRUB_X86_SPILL_RELOAD_RE = (
|
||||
re.compile(
|
||||
r'-?\d+\(%([er])[sb]p\)(.*(?:Spill|Reload))$',
|
||||
@ -163,8 +169,13 @@ def scrub_asm_x86(asm, args):
|
||||
asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
|
||||
# Expand the tabs used for indentation.
|
||||
asm = string.expandtabs(asm, 2)
|
||||
|
||||
# Detect shuffle asm comments and hide the operands in favor of the comments.
|
||||
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
|
||||
if getattr(args, 'no_x86_scrub_mem_shuffle', True):
|
||||
asm = SCRUB_X86_SHUFFLES_NO_MEM_RE.sub(r'\1 {{.*#+}} \2', asm)
|
||||
else:
|
||||
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
|
||||
|
||||
# Detect stack spills and reloads and hide their exact offset and whether
|
||||
# they used the stack pointer or frame pointer.
|
||||
asm = SCRUB_X86_SPILL_RELOAD_RE.sub(r'{{[-0-9]+}}(%\1{{[sb]}}p)\2', asm)
|
||||
|
@ -36,6 +36,9 @@ def main():
|
||||
help='Use more regex for x86 matching to reduce diffs between various subtargets')
|
||||
parser.add_argument(
|
||||
'--no_x86_scrub_rip', action='store_false', dest='x86_scrub_rip')
|
||||
parser.add_argument(
|
||||
'--no_x86_scrub_mem_shuffle', action='store_true', default=False,
|
||||
help='Reduce scrubbing shuffles with memory operands')
|
||||
parser.add_argument('tests', nargs='+')
|
||||
args = common.parse_commandline_args(parser)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user