1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00
llvm-mirror/lib/Transforms
Fangrui Song 71ceca8ea9 [AlignmentFromAssumptions] getNewAlignmentDiff(): use getURemExpr()
The alignment is calculated incorrectly, thus sometimes it doesn't generate aligned mov instructions, as shown by the example below:

```
// b.cc
typedef long long index;

extern "C" index g_tid;
extern "C" index g_num;

void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
    index n = 64*1024;
    index m = 16*1024;
    index k = 4*1024;
    index tid = g_tid;
    index num = g_num;
    __builtin_assume_aligned(a, 32);
    __builtin_assume_aligned(b, 32);
    __builtin_assume_aligned(c, 32);
    for (index i0=tid*k; i0<m; i0+=num*k)
        for (index i1=0; i1<n*m; i1+=m)
            for (index i2=0; i2<k; i2++)
                c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
}
```

Compile with `clang b.cc -Ofast -march=skylake -mavx2 -S`

```
vmovaps -224(%rdi,%rbx,4), %ymm0
vmovups -192(%rdi,%rbx,4), %ymm1         # should be movaps
vmovups -160(%rdi,%rbx,4), %ymm2         # should be movaps
vmovups -128(%rdi,%rbx,4), %ymm3         # should be movaps
vaddps  -224(%rsi,%rbx,4), %ymm0, %ymm0
vaddps  -192(%rsi,%rbx,4), %ymm1, %ymm1
vaddps  -160(%rsi,%rbx,4), %ymm2, %ymm2
vaddps  -128(%rsi,%rbx,4), %ymm3, %ymm3
vmovaps %ymm0, -224(%rdx,%rbx,4)
vmovups %ymm1, -192(%rdx,%rbx,4)         # should be movaps
vmovups %ymm2, -160(%rdx,%rbx,4)         # should be movaps
vmovups %ymm3, -128(%rdx,%rbx,4)         # should be movaps
```

Differential Revision: https://reviews.llvm.org/D66575
Patch by Dun Liang

llvm-svn: 369723
2019-08-23 02:17:04 +00:00
..
AggressiveInstCombine
Coroutines [llvm] Migrate llvm::make_unique to std::make_unique 2019-08-15 15:54:37 +00:00
Hello
InstCombine [instcombine] icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0 2019-08-21 15:51:57 +00:00
Instrumentation hwasan: Untag unwound stack frames by wrapping personality functions. 2019-08-23 01:28:44 +00:00
IPO IR. Change strip* family of functions to not look through aliases. 2019-08-22 19:56:14 +00:00
ObjCARC [ObjC][ARC] Delete ObjC runtime calls on global variables annotated 2019-06-14 22:06:32 +00:00
Scalar [AlignmentFromAssumptions] getNewAlignmentDiff(): use getURemExpr() 2019-08-23 02:17:04 +00:00
Utils [Loop Peeling] Fix silly bug in metadata update. 2019-08-22 10:06:46 +00:00
Vectorize [SLP][NFC] Avoid repetitive calls to getSameOpcode() 2019-08-20 00:22:04 +00:00
CMakeLists.txt
LLVMBuild.txt