1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

Revert "Temporarily Revert "Add basic loop fusion pass.""

The reversion apparently deleted the test/Transforms directory.

Will be re-reverting again.

llvm-svn: 358552
This commit is contained in:
Eric Christopher 2019-04-17 04:52:47 +00:00
parent d4ad193fba
commit a62270de2c
4868 changed files with 575521 additions and 0 deletions

View File

@ -219,6 +219,7 @@ void initializeLoopDeletionLegacyPassPass(PassRegistry&);
void initializeLoopDistributeLegacyPass(PassRegistry&);
void initializeLoopExtractorPass(PassRegistry&);
void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
void initializeLoopFuseLegacyPass(PassRegistry&);
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);

View File

@ -458,6 +458,12 @@ FunctionPass *createNaryReassociatePass();
//
FunctionPass *createLoopDistributePass();
//===----------------------------------------------------------------------===//
//
// LoopFuse - Fuse loops.
//
FunctionPass *createLoopFusePass();
//===----------------------------------------------------------------------===//
//
// LoopLoadElimination - Perform loop-aware load elimination.

View File

@ -0,0 +1,30 @@
//===- LoopFuse.h - Loop Fusion Pass ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the Loop Fusion pass.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
#define LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
#include "llvm/IR/PassManager.h"
namespace llvm {
class Function;
class LoopFusePass : public PassInfoMixin<LoopFusePass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H

View File

@ -122,6 +122,7 @@
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"

View File

@ -197,6 +197,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
FUNCTION_PASS("loop-fuse", LoopFusePass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())

View File

@ -28,6 +28,7 @@ add_llvm_library(LLVMScalarOpts
LoopDeletion.cpp
LoopDataPrefetch.cpp
LoopDistribute.cpp
LoopFuse.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
LoopInterchange.cpp

File diff suppressed because it is too large Load Diff

View File

@ -62,6 +62,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeJumpThreadingPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
initializeLoopFuseLegacyPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
initializeLoopDeletionLegacyPassPass(Registry);
initializeLoopAccessLegacyAnalysisPass(Registry);

View File

@ -0,0 +1,11 @@
; RUN: opt < %s -adce
define i32 @"main"(i32 %argc) {
br label %2
%retval = phi i32 [ %argc, %2 ] ; <i32> [#uses=2]
%two = add i32 %retval, %retval ; <i32> [#uses=1]
ret i32 %two
br label %1
}

View File

@ -0,0 +1,16 @@
; It is illegal to remove BB1 because it will mess up the PHI node!
;
; RUN: opt < %s -adce -S | grep BB1
define i32 @test(i1 %C, i32 %A, i32 %B) {
; <label>:0
br i1 %C, label %BB1, label %BB2
BB1: ; preds = %0
br label %BB2
BB2: ; preds = %BB1, %0
%R = phi i32 [ %A, %0 ], [ %B, %BB1 ] ; <i32> [#uses=1]
ret i32 %R
}

View File

@ -0,0 +1,35 @@
; This testcase contains a entire loop that should be removed. The only thing
; left is the store instruction in BB0. The problem this testcase was running
; into was that when the reg109 PHI was getting zero predecessors, it was
; removed even though there were uses still around. Now the uses are filled
; in with a dummy value before the PHI is deleted.
;
; RUN: opt < %s -S -adce | grep bb1
; RUN: opt < %s -S -adce -adce-remove-loops | FileCheck %s
%node_t = type { double*, %node_t*, %node_t**, double**, double*, i32, i32 }
define void @localize_local(%node_t* %nodelist) {
bb0:
%nodelist.upgrd.1 = alloca %node_t* ; <%node_t**> [#uses=2]
store %node_t* %nodelist, %node_t** %nodelist.upgrd.1
br label %bb1
bb1: ; preds = %bb0
%reg107 = load %node_t*, %node_t** %nodelist.upgrd.1 ; <%node_t*> [#uses=2]
%cond211 = icmp eq %node_t* %reg107, null ; <i1> [#uses=1]
; CHECK: br label %bb3
br i1 %cond211, label %bb3, label %bb2
bb2: ; preds = %bb2, %bb1
%reg109 = phi %node_t* [ %reg110, %bb2 ], [ %reg107, %bb1 ] ; <%node_t*> [#uses=1]
%reg212 = getelementptr %node_t, %node_t* %reg109, i64 0, i32 1 ; <%node_t**> [#uses=1]
%reg110 = load %node_t*, %node_t** %reg212 ; <%node_t*> [#uses=2]
%cond213 = icmp ne %node_t* %reg110, null ; <i1> [#uses=1]
; CHECK: br label %bb3
br i1 %cond213, label %bb2, label %bb3
bb3: ; preds = %bb2, %bb1
ret void
}

View File

@ -0,0 +1,19 @@
; This testcase is a distilled form of: 2002-05-28-Crash.ll
; RUN: opt < %s -adce
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
define float @test(i32 %i) {
%F = sitofp i32 %i to float ; <float> [#uses=1]
%I = bitcast i32 %i to i32 ; <i32> [#uses=1]
br label %Loop
Loop: ; preds = %Loop, %0
%B = icmp ne i32 %I, 0 ; <i1> [#uses=1]
; CHECK: br label %Out
br i1 %B, label %Out, label %Loop
Out: ; preds = %Loop
ret float %F
}

View File

@ -0,0 +1,56 @@
; This testcase is distilled from the GNU rx package. The loop should be
; removed but causes a problem when ADCE does. The source function is:
; int rx_bitset_empty (int size, rx_Bitset set) {
; int x;
; RX_subset s;
; s = set[0];
; set[0] = 1;
; for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
; ;
; set[0] = s;
; return !s;
;}
;
; RUN: opt < %s -adce
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
define i32 @rx_bitset_empty(i32 %size, i32* %set) {
bb1:
%reg110 = load i32, i32* %set ; <i32> [#uses=2]
store i32 1, i32* %set
%cast112 = sext i32 %size to i64 ; <i64> [#uses=1]
%reg113 = add i64 %cast112, 31 ; <i64> [#uses=1]
%reg114 = lshr i64 %reg113, 5 ; <i64> [#uses=2]
%cast109 = trunc i64 %reg114 to i32 ; <i32> [#uses=1]
%reg129 = add i32 %cast109, -1 ; <i32> [#uses=1]
%reg114-idxcast = trunc i64 %reg114 to i32 ; <i32> [#uses=1]
%reg114-idxcast-offset = add i32 %reg114-idxcast, 1073741823 ; <i32> [#uses=1]
%reg114-idxcast-offset.upgrd.1 = zext i32 %reg114-idxcast-offset to i64 ; <i64> [#uses=1]
%reg124 = getelementptr i32, i32* %set, i64 %reg114-idxcast-offset.upgrd.1 ; <i32*> [#uses=1]
%reg125 = load i32, i32* %reg124 ; <i32> [#uses=1]
%cond232 = icmp ne i32 %reg125, 0 ; <i1> [#uses=1]
; CHECK: br label %bb3
br i1 %cond232, label %bb3, label %bb2
bb2: ; preds = %bb2, %bb1
%cann-indvar = phi i32 [ 0, %bb1 ], [ %add1-indvar, %bb2 ] ; <i32> [#uses=2]
%reg130-scale = mul i32 %cann-indvar, -1 ; <i32> [#uses=1]
%reg130 = add i32 %reg130-scale, %reg129 ; <i32> [#uses=1]
%add1-indvar = add i32 %cann-indvar, 1 ; <i32> [#uses=1]
%reg130-idxcast = bitcast i32 %reg130 to i32 ; <i32> [#uses=1]
%reg130-idxcast-offset = add i32 %reg130-idxcast, 1073741823 ; <i32> [#uses=1]
%reg130-idxcast-offset.upgrd.2 = zext i32 %reg130-idxcast-offset to i64 ; <i64> [#uses=1]
%reg118 = getelementptr i32, i32* %set, i64 %reg130-idxcast-offset.upgrd.2 ; <i32*> [#uses=1]
%reg119 = load i32, i32* %reg118 ; <i32> [#uses=1]
%cond233 = icmp eq i32 %reg119, 0 ; <i1> [#uses=1]
br i1 %cond233, label %bb2, label %bb3
bb3: ; preds = %bb2, %bb1
store i32 %reg110, i32* %set
%cast126 = zext i32 %reg110 to i64 ; <i64> [#uses=1]
%reg127 = add i64 %cast126, -1 ; <i64> [#uses=1]
%reg128 = lshr i64 %reg127, 63 ; <i64> [#uses=1]
%cast120 = trunc i64 %reg128 to i32 ; <i32> [#uses=1]
ret i32 %cast120
}

View File

@ -0,0 +1,14 @@
; This testcase fails because ADCE does not correctly delete the chain of
; three instructions that are dead here. Ironically there were a dead basic
; block in this function, it would work fine, but that would be the part we
; have to fix now, wouldn't it....
;
; RUN: opt < %s -adce -S | FileCheck %s
define void @foo(i8* %reg5481) {
%cast611 = bitcast i8* %reg5481 to i8** ; <i8**> [#uses=1]
%reg162 = load i8*, i8** %cast611 ; <i8*> [#uses=1]
; CHECK-NOT: ptrtoint
ptrtoint i8* %reg162 to i32 ; <i32>:1 [#uses=0]
ret void
}

View File

@ -0,0 +1,50 @@
; This testcase was extracted from the gzip SPEC benchmark
;
; RUN: opt < %s -adce | FileCheck %s
@bk = external global i32 ; <i32*> [#uses=2]
@hufts = external global i32 ; <i32*> [#uses=1]
define i32 @inflate() {
bb0:
br label %bb2
bb2: ; preds = %bb6, %bb0
%reg128 = phi i32 [ %reg130, %bb6 ], [ 0, %bb0 ] ; <i32> [#uses=2]
br i1 true, label %bb4, label %bb3
bb3: ; preds = %bb2
br label %UnifiedExitNode
; CHECK-NOT: bb4:
; CHECK-NOT: bb5:
bb4: ; preds = %bb2
%reg117 = load i32, i32* @hufts ; <i32> [#uses=2]
%cond241 = icmp ule i32 %reg117, %reg128 ; <i1> [#uses=1]
br i1 %cond241, label %bb6, label %bb5
bb5: ; preds = %bb4
br label %bb6
bb6: ; preds = %bb5, %bb4
%reg130 = phi i32 [ %reg117, %bb5 ], [ %reg128, %bb4 ] ; <i32> [#uses=1]
br i1 false, label %bb2, label %bb7
bb7: ; preds = %bb6
%reg126 = load i32, i32* @bk ; <i32> [#uses=1]
%cond247 = icmp ule i32 %reg126, 7 ; <i1> [#uses=1]
br i1 %cond247, label %bb9, label %bb8
bb8: ; preds = %bb8, %bb7
%reg119 = load i32, i32* @bk ; <i32> [#uses=1]
%cond256 = icmp ugt i32 %reg119, 7 ; <i1> [#uses=1]
br i1 %cond256, label %bb8, label %bb9
bb9: ; preds = %bb8, %bb7
br label %UnifiedExitNode
UnifiedExitNode: ; preds = %bb9, %bb3
%UnifiedRetVal = phi i32 [ 7, %bb3 ], [ 0, %bb9 ] ; <i32> [#uses=1]
ret i32 %UnifiedRetVal
}

View File

@ -0,0 +1,10 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -disable-output -adce-remove-loops
define void @test() {
br label %BB3
BB3: ; preds = %BB3, %0
br label %BB3
}

View File

@ -0,0 +1,30 @@
; Testcase reduced from 197.parser by bugpoint
; RUN: opt < %s -adce
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
define void @conjunction_prune() {
; <label>:0
br label %bb19
bb19: ; preds = %bb23, %bb22, %0
%reg205 = phi i8* [ null, %bb22 ], [ null, %bb23 ], [ null, %0 ] ; <i8*> [#uses=1]
; CHECK: br label %bb22
br i1 false, label %bb21, label %bb22
bb21: ; preds = %bb19
%cast455 = bitcast i8* %reg205 to i8** ; <i8**> [#uses=0]
; CHECK: br label %bb22
br label %bb22
bb22: ; preds = %bb21, %bb19
; CHECK: br label %bb23
br i1 false, label %bb19, label %bb23
bb23: ; preds = %bb22
; CHECK: br label %bb28
br i1 false, label %bb19, label %bb28
bb28: ; preds = %bb23
ret void
}

View File

@ -0,0 +1,37 @@
; THis testcase caused an assertion failure because a PHI node did not have
; entries for it's postdominator. But I think this can only happen when the
; PHI node is dead, so we just avoid patching up dead PHI nodes.
; RUN: opt < %s -adce -S | FileCheck %s
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
target datalayout = "e-p:32:32"
define void @dead_test8() {
entry:
br label %loopentry
loopentry: ; preds = %endif, %entry
%k.1 = phi i32 [ %k.0, %endif ], [ 0, %entry ] ; <i32> [#uses=1]
br i1 false, label %no_exit, label %return
no_exit: ; preds = %loopentry
; CHECK: br label %then
br i1 false, label %then, label %else
then: ; preds = %no_exit
br label %endif
else: ; preds = %no_exit
%dec = add i32 %k.1, -1 ; <i32> [#uses=1]
br label %endif
endif: ; preds = %else, %then
%k.0 = phi i32 [ %dec, %else ], [ 0, %then ] ; <i32> [#uses=1]
store i32 2, i32* null
br label %loopentry
return: ; preds = %loopentry
ret void
}

View File

@ -0,0 +1,29 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -adce-remove-loops -disable-output
@G = external global i32* ; <i32**> [#uses=1]
declare void @Fn(i32*)
define i32 @main(i32 %argc.1, i8** %argv.1) {
entry:
br label %endif.42
endif.42: ; preds = %shortcirc_done.12, %then.66, %endif.42, %entry
br i1 false, label %endif.65, label %endif.42
then.66: ; preds = %shortcirc_done.12
call void @Fn( i32* %tmp.2846 )
br label %endif.42
endif.65: ; preds = %endif.42
%tmp.2846 = load i32*, i32** @G ; <i32*> [#uses=1]
br i1 false, label %shortcirc_next.12, label %shortcirc_done.12
shortcirc_next.12: ; preds = %endif.65
br label %shortcirc_done.12
shortcirc_done.12: ; preds = %shortcirc_next.12, %endif.65
br i1 false, label %then.66, label %endif.42
}

View File

@ -0,0 +1,94 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -adce-remove-loops=true -disable-output
target datalayout = "e-p:32:32"
%struct..CppObjTypeDesc = type { i32, i16, i16 }
%struct..TypeToken = type { i32, i16, i16 }
define i32 @C_ReFaxToDb() {
entry:
br i1 false, label %endif.0, label %then.0
then.0: ; preds = %entry
ret i32 0
endif.0: ; preds = %entry
br i1 false, label %then.11, label %then.4
then.4: ; preds = %endif.0
ret i32 0
then.11: ; preds = %endif.0
br i1 false, label %loopentry.0, label %else.2
loopentry.0: ; preds = %loopentry.1, %endif.14, %then.11
br i1 false, label %endif.14, label %loopexit.0
endif.14: ; preds = %loopentry.0
br i1 false, label %loopentry.1, label %loopentry.0
loopentry.1: ; preds = %then.53, %endif.14
%SubArrays.10 = phi i32* [ %SubArrays.8, %then.53 ], [ null, %endif.14 ] ; <i32*> [#uses=3]
br i1 false, label %no_exit.1, label %loopentry.0
no_exit.1: ; preds = %loopentry.1
; CHECK: switch
switch i32 0, label %label.17 [
i32 2, label %label.11
i32 19, label %label.10
]
label.10: ; preds = %no_exit.1
br i1 false, label %then.43, label %endif.43
then.43: ; preds = %label.10
br i1 false, label %then.44, label %endif.44
then.44: ; preds = %then.43
br i1 false, label %shortcirc_next.4, label %endif.45
shortcirc_next.4: ; preds = %then.44
br i1 false, label %no_exit.2, label %loopexit.2
no_exit.2: ; preds = %shortcirc_next.4
%tmp.897 = getelementptr i32, i32* %SubArrays.10, i64 0 ; <i32*> [#uses=1]
%tmp.899 = load i32, i32* %tmp.897 ; <i32> [#uses=1]
store i32 %tmp.899, i32* null
ret i32 0
loopexit.2: ; preds = %shortcirc_next.4
ret i32 0
endif.45: ; preds = %then.44
ret i32 0
endif.44: ; preds = %then.43
ret i32 0
endif.43: ; preds = %label.10
ret i32 0
label.11: ; preds = %no_exit.1
ret i32 0
label.17: ; preds = %no_exit.1
br i1 false, label %then.53, label %shortcirc_next.7
shortcirc_next.7: ; preds = %label.17
br i1 false, label %then.53, label %shortcirc_next.8
shortcirc_next.8: ; preds = %shortcirc_next.7
ret i32 0
then.53: ; preds = %shortcirc_next.7, %label.17
%SubArrays.8 = phi i32* [ %SubArrays.10, %shortcirc_next.7 ], [ %SubArrays.10, %label.17 ] ; <i32*> [#uses=1]
%tmp.1023 = load i32, i32* null ; <i32> [#uses=1]
switch i32 %tmp.1023, label %loopentry.1 [
]
loopexit.0: ; preds = %loopentry.0
ret i32 0
else.2: ; preds = %then.11
ret i32 0
}

View File

@ -0,0 +1,44 @@
; RUN: opt < %s -adce -S | FileCheck %s
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
define void @dead_test8(i32* %data.1, i32 %idx.1) {
entry:
%tmp.1 = load i32, i32* %data.1 ; <i32> [#uses=2]
%tmp.41 = icmp sgt i32 %tmp.1, 0 ; <i1> [#uses=1]
br i1 %tmp.41, label %no_exit.preheader, label %return
no_exit.preheader: ; preds = %entry
%tmp.11 = getelementptr i32, i32* %data.1, i64 1 ; <i32*> [#uses=1]
%tmp.22-idxcast = sext i32 %idx.1 to i64 ; <i64> [#uses=1]
%tmp.28 = getelementptr i32, i32* %data.1, i64 %tmp.22-idxcast ; <i32*> [#uses=1]
br label %no_exit
no_exit: ; preds = %endif, %no_exit.preheader
%k.1 = phi i32 [ %k.0, %endif ], [ 0, %no_exit.preheader ] ; <i32> [#uses=3]
%i.0 = phi i32 [ %inc.1, %endif ], [ 0, %no_exit.preheader ] ; <i32> [#uses=1]
%tmp.12 = load i32, i32* %tmp.11 ; <i32> [#uses=1]
%tmp.14 = sub i32 0, %tmp.12 ; <i32> [#uses=1]
; CHECK-NOT: %tmp.161
%tmp.161 = icmp ne i32 %k.1, %tmp.14 ; <i1> [#uses=1]
; CHECK: br label %then
br i1 %tmp.161, label %then, label %else
then: ; preds = %no_exit
%inc.0 = add i32 %k.1, 1 ; <i32> [#uses=1]
br label %endif
else: ; preds = %no_exit
%dec = add i32 %k.1, -1 ; <i32> [#uses=1]
br label %endif
endif: ; preds = %else, %then
%k.0 = phi i32 [ %dec, %else ], [ %inc.0, %then ] ; <i32> [#uses=1]
store i32 2, i32* %tmp.28
%inc.1 = add i32 %i.0, 1 ; <i32> [#uses=2]
%tmp.4 = icmp slt i32 %inc.1, %tmp.1 ; <i1> [#uses=1]
br i1 %tmp.4, label %no_exit, label %return
return: ; preds = %endif, %entry
ret void
}

View File

@ -0,0 +1,22 @@
; RUN: opt < %s -adce -disable-output
define void @test() personality i32 (...)* @__gxx_personality_v0 {
br i1 false, label %then, label %endif
then: ; preds = %0
invoke void null( i8* null )
to label %invoke_cont unwind label %invoke_catch
invoke_catch: ; preds = %then
%exn = landingpad {i8*, i32}
cleanup
resume { i8*, i32 } %exn
invoke_cont: ; preds = %then
ret void
endif: ; preds = %0
ret void
}
declare i32 @__gxx_personality_v0(...)

View File

@ -0,0 +1,10 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -adce-remove-loops -disable-output
define i32 @main() {
br label %loop
loop: ; preds = %loop, %0
br label %loop
}

View File

@ -0,0 +1,20 @@
; RUN: opt < %s -adce -simplifycfg -S | grep call
; RUN: opt < %s -adce -adce-remove-loops -simplifycfg -S | grep call
declare void @exit(i32)
define i32 @main(i32 %argc) {
%C = icmp eq i32 %argc, 1 ; <i1> [#uses=2]
br i1 %C, label %Cond, label %Done
Cond: ; preds = %0
br i1 %C, label %Loop, label %Done
Loop: ; preds = %Loop, %Cond
call void @exit( i32 0 )
br label %Loop
Done: ; preds = %Cond, %0
ret i32 1
}

View File

@ -0,0 +1,17 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -adce-remove-loops -disable-output
define void @test() {
entry:
br label %UnifiedReturnBlock
UnifiedReturnBlock: ; preds = %invoke_catch.0, %entry
ret void
invoke_catch.0: ; No predecessors!
br i1 false, label %UnifiedUnwindBlock, label %UnifiedReturnBlock
UnifiedUnwindBlock: ; preds = %invoke_catch.0
unreachable
}

View File

@ -0,0 +1,52 @@
; RUN: opt < %s -adce -disable-output
declare void @strlen()
declare void @_ZN10QByteArray6resizeEi()
declare void @q_atomic_decrement()
define void @_ZNK10QByteArray13leftJustifiedEicb() personality i32 (...)* @__gxx_personality_v0 {
entry:
invoke void @strlen( )
to label %tmp.3.i.noexc unwind label %invoke_catch.0
tmp.3.i.noexc: ; preds = %entry
br i1 false, label %then.0, label %else.0
invoke_catch.0: ; preds = %entry
%exn.0 = landingpad {i8*, i32}
cleanup
invoke void @q_atomic_decrement( )
to label %tmp.1.i.i183.noexc unwind label %terminate
tmp.1.i.i183.noexc: ; preds = %invoke_catch.0
ret void
then.0: ; preds = %tmp.3.i.noexc
invoke void @_ZN10QByteArray6resizeEi( )
to label %invoke_cont.1 unwind label %invoke_catch.1
invoke_catch.1: ; preds = %then.0
%exn.1 = landingpad {i8*, i32}
cleanup
invoke void @q_atomic_decrement( )
to label %tmp.1.i.i162.noexc unwind label %terminate
tmp.1.i.i162.noexc: ; preds = %invoke_catch.1
ret void
invoke_cont.1: ; preds = %then.0
ret void
else.0: ; preds = %tmp.3.i.noexc
ret void
terminate: ; preds = %invoke_catch.1, %invoke_catch.0
%dbg.0.1 = phi { }* [ null, %invoke_catch.1 ], [ null, %invoke_catch.0 ] ; <{ }*> [#uses=0]
%exn = landingpad {i8*, i32}
cleanup
unreachable
}
declare i32 @__gxx_personality_v0(...)

View File

@ -0,0 +1,55 @@
; RUN: opt < %s -sroa -adce -adce-remove-loops -S | FileCheck %s
; ModuleID = 'test1.bc'
source_filename = "test1.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define i32 @foo(i32, i32, i32) #0 {
%4 = alloca i32, align 4
%5 = alloca i32, align 4
%6 = alloca i32, align 4
%7 = alloca i32, align 4
%8 = alloca i32, align 4
store i32 %0, i32* %4, align 4
store i32 %1, i32* %5, align 4
store i32 %2, i32* %6, align 4
store i32 0, i32* %7, align 4
%9 = load i32, i32* %5, align 4
%I10 = icmp ne i32 %9, 0
br i1 %I10, label %B11, label %B21
B11:
store i32 0, i32* %8, align 4
br label %B12
B12:
%I13 = load i32, i32* %8, align 4
%I14 = load i32, i32* %6, align 4
%I15 = icmp slt i32 %I13, %I14
; CHECK: br label %B20
br i1 %I15, label %B16, label %B20
B16:
br label %B17
B17:
%I18 = load i32, i32* %8, align 4
%I19 = add nsw i32 %I18, 1
store i32 %I19, i32* %8, align 4
br label %B12
B20:
store i32 1, i32* %7, align 4
br label %B21
B21:
%I22 = load i32, i32* %7, align 4
ret i32 %I22
}
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 4.0.0"}

View File

@ -0,0 +1,24 @@
; RUN: opt < %s -adce | llvm-dis
; RUN: opt < %s -adce -verify-dom-info | llvm-dis
define void @foo() {
entry:
br label %switch
switch: ; preds = %entry
switch i32 undef, label %default [
i32 2, label %two
i32 5, label %five
i32 4, label %four
]
four: ; preds = %switch
br label %exit
five: ; preds = %switch
br label %exit
two: ; preds = %switch
br label %exit
default: ; preds = %switch
br label %exit
exit: ; preds = %default, %two, %five, %four
ret void
}

View File

@ -0,0 +1,18 @@
; RUN: opt < %s -adce -simplifycfg | llvm-dis
; RUN: opt < %s -passes=adce | llvm-dis
define i32 @Test(i32 %A, i32 %B) {
BB1:
br label %BB4
BB2: ; No predecessors!
br label %BB3
BB3: ; preds = %BB4, %BB2
%ret = phi i32 [ %X, %BB4 ], [ %B, %BB2 ] ; <i32> [#uses=1]
ret i32 %ret
BB4: ; preds = %BB1
%X = phi i32 [ %A, %BB1 ] ; <i32> [#uses=1]
br label %BB3
}

View File

@ -0,0 +1,102 @@
; RUN: opt < %s -adce -S | FileCheck %s
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
%spec_fd_t = type { i32, i32, i32, i8* }
@__iob = external global [20 x %FILE] ; <[20 x %FILE]*> [#uses=1]
@dbglvl = global i32 4 ; <i32*> [#uses=3]
@spec_fd = external global [3 x %spec_fd_t] ; <[3 x %spec_fd_t]*> [#uses=4]
@.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00" ; <[34 x i8]*> [#uses=1]
@.LC10 = internal global [4 x i8] c"EOF\00" ; <[4 x i8]*> [#uses=1]
@.LC11 = internal global [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
@.LC12 = internal global [17 x i8] c"spec_getc: %d = \00" ; <[17 x i8]*> [#uses=1]
declare i32 @fprintf(%FILE*, i8*, ...)
declare void @exit(i32)
declare i32 @remove(i8*)
declare i32 @fputc(i32, %FILE*)
declare i32 @fwrite(i8*, i32, i32, %FILE*)
declare void @perror(i8*)
define i32 @spec_getc(i32 %fd) {
%reg109 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond266 = icmp sle i32 %reg109, 4 ; <i1> [#uses=1]
; CHECKL br label %bb3
br i1 %cond266, label %bb3, label %bb2
bb2: ; preds = %0
%cast273 = getelementptr [17 x i8], [17 x i8]* @.LC12, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb3
bb3: ; preds = %bb2, %0
%cond267 = icmp sle i32 %fd, 3 ; <i1> [#uses=1]
br i1 %cond267, label %bb5, label %bb4
bb4: ; preds = %bb3
%reg111 = getelementptr [20 x %FILE], [20 x %FILE]* @__iob, i64 0, i64 1, i32 3 ; <i8*> [#uses=1]
%cast274 = getelementptr [34 x i8], [34 x i8]* @.LC9, i64 0, i64 0 ; <i8*> [#uses=0]
%cast282 = bitcast i8* %reg111 to %FILE* ; <%FILE*> [#uses=0]
call void @exit( i32 1 )
br label %UnifiedExitNode
bb5: ; preds = %bb3
%reg107-idxcast1 = sext i32 %fd to i64 ; <i64> [#uses=2]
%reg107-idxcast2 = sext i32 %fd to i64 ; <i64> [#uses=1]
%reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2 ; <%spec_fd_t*> [#uses=1]
%idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; <i32*> [#uses=1]
%reg1321 = load i32, i32* %idx1 ; <i32> [#uses=3]
%idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1 ; <i32*> [#uses=1]
%reg1331 = load i32, i32* %idx2 ; <i32> [#uses=1]
%cond270 = icmp slt i32 %reg1321, %reg1331 ; <i1> [#uses=1]
br i1 %cond270, label %bb9, label %bb6
bb6: ; preds = %bb5
%reg134 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond271 = icmp sle i32 %reg134, 4 ; <i1> [#uses=1]
; CHECK: br label %bb8
br i1 %cond271, label %bb8, label %bb7
bb7: ; preds = %bb6
%cast277 = getelementptr [4 x i8], [4 x i8]* @.LC10, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb8
bb8: ; preds = %bb7, %bb6
br label %UnifiedExitNode
bb9: ; preds = %bb5
%reg107-idxcast3 = sext i32 %fd to i64 ; <i64> [#uses=1]
%idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3 ; <i8**> [#uses=1]
%reg1601 = load i8*, i8** %idx3 ; <i8*> [#uses=1]
%reg132-idxcast1 = sext i32 %reg1321 to i64 ; <i64> [#uses=1]
%idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1 ; <i8*> [#uses=1]
%reg1621 = load i8, i8* %idx4 ; <i8> [#uses=2]
%cast108 = zext i8 %reg1621 to i64 ; <i64> [#uses=0]
%reg157 = add i32 %reg1321, 1 ; <i32> [#uses=1]
%idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; <i32*> [#uses=1]
store i32 %reg157, i32* %idx5
%reg163 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond272 = icmp sle i32 %reg163, 4 ; <i1> [#uses=1]
; CHECK: br label %bb11
br i1 %cond272, label %bb11, label %bb10
bb10: ; preds = %bb9
%cast279 = getelementptr [4 x i8], [4 x i8]* @.LC11, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb11
bb11: ; preds = %bb10, %bb9
%cast291 = zext i8 %reg1621 to i32 ; <i32> [#uses=1]
br label %UnifiedExitNode
UnifiedExitNode: ; preds = %bb11, %bb8, %bb4
%UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ] ; <i32> [#uses=1]
ret i32 %UnifiedRetVal
}
declare i32 @puts(i8*)
declare i32 @printf(i8*, ...)

View File

@ -0,0 +1,102 @@
; RUN: opt < %s -adce -disable-output
; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
%spec_fd_t = type { i32, i32, i32, i8* }
@__iob = external global [20 x %FILE] ; <[20 x %FILE]*> [#uses=1]
@dbglvl = global i32 4 ; <i32*> [#uses=3]
@spec_fd = external global [3 x %spec_fd_t] ; <[3 x %spec_fd_t]*> [#uses=4]
@.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00" ; <[34 x i8]*> [#uses=1]
@.LC10 = internal global [4 x i8] c"EOF\00" ; <[4 x i8]*> [#uses=1]
@.LC11 = internal global [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
@.LC12 = internal global [17 x i8] c"spec_getc: %d = \00" ; <[17 x i8]*> [#uses=1]
declare i32 @fprintf(%FILE*, i8*, ...)
declare void @exit(i32)
declare i32 @remove(i8*)
declare i32 @fputc(i32, %FILE*)
declare i32 @fwrite(i8*, i32, i32, %FILE*)
declare void @perror(i8*)
define i32 @spec_getc(i32 %fd) {
%reg109 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond266 = icmp sle i32 %reg109, 4 ; <i1> [#uses=1]
; CHECK: br label %bb3
br i1 %cond266, label %bb3, label %bb2
bb2: ; preds = %0
%cast273 = getelementptr [17 x i8], [17 x i8]* @.LC12, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb3
bb3: ; preds = %bb2, %0
%cond267 = icmp sle i32 %fd, 3 ; <i1> [#uses=0]
br label %bb5
bb4: ; No predecessors!
%reg111 = getelementptr [20 x %FILE], [20 x %FILE]* @__iob, i64 0, i64 1, i32 3 ; <i8*> [#uses=1]
%cast274 = getelementptr [34 x i8], [34 x i8]* @.LC9, i64 0, i64 0 ; <i8*> [#uses=0]
%cast282 = bitcast i8* %reg111 to %FILE* ; <%FILE*> [#uses=0]
call void @exit( i32 1 )
br label %UnifiedExitNode
bb5: ; preds = %bb3
%reg107-idxcast1 = sext i32 %fd to i64 ; <i64> [#uses=2]
%reg107-idxcast2 = sext i32 %fd to i64 ; <i64> [#uses=1]
%reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2 ; <%spec_fd_t*> [#uses=1]
%idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; <i32*> [#uses=1]
%reg1321 = load i32, i32* %idx1 ; <i32> [#uses=3]
%idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1 ; <i32*> [#uses=1]
%reg1331 = load i32, i32* %idx2 ; <i32> [#uses=1]
%cond270 = icmp slt i32 %reg1321, %reg1331 ; <i1> [#uses=1]
br i1 %cond270, label %bb9, label %bb6
bb6: ; preds = %bb5
%reg134 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond271 = icmp sle i32 %reg134, 4 ; <i1> [#uses=1]
; CHECK: br label %bb8
br i1 %cond271, label %bb8, label %bb7
bb7: ; preds = %bb6
%cast277 = getelementptr [4 x i8], [4 x i8]* @.LC10, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb8
bb8: ; preds = %bb7, %bb6
br label %UnifiedExitNode
bb9: ; preds = %bb5
%reg107-idxcast3 = sext i32 %fd to i64 ; <i64> [#uses=1]
%idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3 ; <i8**> [#uses=1]
%reg1601 = load i8*, i8** %idx3 ; <i8*> [#uses=1]
%reg132-idxcast1 = sext i32 %reg1321 to i64 ; <i64> [#uses=1]
%idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1 ; <i8*> [#uses=1]
%reg1621 = load i8, i8* %idx4 ; <i8> [#uses=2]
%cast108 = zext i8 %reg1621 to i64 ; <i64> [#uses=0]
%reg157 = add i32 %reg1321, 1 ; <i32> [#uses=1]
%idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; <i32*> [#uses=1]
store i32 %reg157, i32* %idx5
%reg163 = load i32, i32* @dbglvl ; <i32> [#uses=1]
%cond272 = icmp sle i32 %reg163, 4 ; <i1> [#uses=1]
; CHECK: br label %bb11
br i1 %cond272, label %bb11, label %bb10
bb10: ; preds = %bb9
%cast279 = getelementptr [4 x i8], [4 x i8]* @.LC11, i64 0, i64 0 ; <i8*> [#uses=0]
br label %bb11
bb11: ; preds = %bb10, %bb9
%cast291 = zext i8 %reg1621 to i32 ; <i32> [#uses=1]
br label %UnifiedExitNode
UnifiedExitNode: ; preds = %bb11, %bb8, %bb4
%UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ] ; <i32> [#uses=1]
ret i32 %UnifiedRetVal
}
declare i32 @puts(i8*)
declare i32 @printf(i8*, ...)

View File

@ -0,0 +1,8 @@
; RUN: opt -adce -S < %s | not grep call
declare i32 @strlen(i8*) readonly nounwind
define void @test() {
call i32 @strlen( i8* null ) ; <i32>:1 [#uses=0]
ret void
}

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -adce -S | grep null
declare i32 @strlen(i8*) readnone
define i32 @test() personality i32 (...)* @__gxx_personality_v0 {
; invoke of pure function should not be deleted!
invoke i32 @strlen( i8* null ) readnone
to label %Cont unwind label %Other ; <i32>:1 [#uses=0]
Cont: ; preds = %0
ret i32 0
Other: ; preds = %0
%exn = landingpad {i8*, i32}
cleanup
ret i32 1
}
declare i32 @__gxx_personality_v0(...)

View File

@ -0,0 +1,100 @@
; RUN: opt -adce -S < %s | FileCheck %s
; Test that debug info intrinsics in dead scopes get eliminated by -adce.
; Generated with 'clang -g -S -emit-llvm | opt -mem2reg -inline' at r262899
; (before -adce was augmented) and then hand-reduced. This was the input:
;
;;void sink(void);
;;
;;void variable_in_unused_subscope(void) {
;; { int i = 0; }
;; sink();
;;}
;;
;;void variable_in_parent_scope(void) {
;; int i = 0;
;; { sink(); }
;;}
;;
;;static int empty_function_with_unused_variable(void) {
;; { int i = 0; }
;; return 0;
;;}
;;
;;void calls_empty_function_with_unused_variable_in_unused_subscope(void) {
;; { empty_function_with_unused_variable(); }
;; sink();
;;}
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @sink()
; CHECK-LABEL: define void @variable_in_unused_subscope(
define void @variable_in_unused_subscope() !dbg !4 {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @sink
; CHECK-NEXT: ret void
entry:
call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !17), !dbg !18
call void @sink(), !dbg !19
ret void, !dbg !20
}
; CHECK-LABEL: define void @variable_in_parent_scope(
define void @variable_in_parent_scope() !dbg !7 {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value
; CHECK-NEXT: call void @sink
; CHECK-NEXT: ret void
entry:
call void @llvm.dbg.value(metadata i32 0, metadata !21, metadata !17), !dbg !22
call void @sink(), !dbg !23
ret void, !dbg !25
}
; CHECK-LABEL: define void @calls_empty_function_with_unused_variable_in_unused_subscope(
define void @calls_empty_function_with_unused_variable_in_unused_subscope() !dbg !8 {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @sink
; CHECK-NEXT: ret void
entry:
call void @llvm.dbg.value(metadata i32 0, metadata !26, metadata !17), !dbg !28
call void @sink(), !dbg !31
ret void, !dbg !32
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!14}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "t.c", directory: "/path/to/test/Transforms/ADCE")
!2 = !{}
!4 = distinct !DISubprogram(name: "variable_in_unused_subscope", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = distinct !DISubprogram(name: "variable_in_parent_scope", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!8 = distinct !DISubprogram(name: "calls_empty_function_with_unused_variable_in_unused_subscope", scope: !1, file: !1, line: 18, type: !5, isLocal: false, isDefinition: true, scopeLine: 18, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!10 = distinct !DISubprogram(name: "empty_function_with_unused_variable", scope: !1, file: !1, line: 13, type: !11, isLocal: true, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 4, type: !13)
!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 3)
!17 = !DIExpression()
!18 = !DILocation(line: 4, column: 9, scope: !16)
!19 = !DILocation(line: 5, column: 3, scope: !4)
!20 = !DILocation(line: 6, column: 1, scope: !4)
!21 = !DILocalVariable(name: "i", scope: !7, file: !1, line: 9, type: !13)
!22 = !DILocation(line: 9, column: 7, scope: !7)
!23 = !DILocation(line: 10, column: 5, scope: !24)
!24 = distinct !DILexicalBlock(scope: !7, file: !1, line: 10, column: 3)
!25 = !DILocation(line: 11, column: 1, scope: !7)
!26 = !DILocalVariable(name: "i", scope: !27, file: !1, line: 14, type: !13)
!27 = distinct !DILexicalBlock(scope: !10, file: !1, line: 14, column: 3)
!28 = !DILocation(line: 14, column: 9, scope: !27, inlinedAt: !29)
!29 = distinct !DILocation(line: 19, column: 5, scope: !30)
!30 = distinct !DILexicalBlock(scope: !8, file: !1, line: 19, column: 3)
!31 = !DILocation(line: 20, column: 3, scope: !8)
!32 = !DILocation(line: 21, column: 1, scope: !8)

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -adce -S | FileCheck %s
; RUN: opt < %s -passes=adce -S | FileCheck %s
; Verify that a call to instrument a constant is deleted.
@__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
@__profd_foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6699318081062747564, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i32 0, i32 0), i8* bitcast (i32 ()* @foo to i8*), i8* null, i32 1, [1 x i16] [i16 1] }, section "__llvm_prf_data", align 8
define i32 @foo() {
; CHECK-NOT: call void @__llvm_profile_instrument_target
entry:
tail call void @__llvm_profile_instrument_target(i64 ptrtoint (i32 (i32)* @bar to i64), i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 0)
%call = tail call i32 @bar(i32 21)
ret i32 %call
}
declare i32 @bar(i32)
declare void @__llvm_profile_instrument_target(i64, i8*, i32)

View File

@ -0,0 +1,39 @@
; RUN: opt < %s -gvn -simplifycfg -adce | llvm-dis
; RUN: opt < %s -gvn -simplifycfg -adce -verify-dom-info | llvm-dis
; This test makes sure that the DominatorTree properly handles
; deletion of edges that go to forward-unreachable regions.
; In this case, %land.end is already forward unreachable when
; the DT gets informed about the deletion of %entry -> %land.end.
@a = common global i32 0, align 4
define i32 @main() {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load i32, i32* @a, align 4
%cmp = icmp ne i32 %0, 1
br i1 %cmp, label %land.rhs, label %land.end4
land.rhs: ; preds = %entry
%1 = load i32, i32* @a, align 4
%tobool = icmp ne i32 %1, 0
br i1 %tobool, label %land.rhs1, label %land.end
land.rhs1: ; preds = %land.rhs
br label %land.end
land.end: ; preds = %land.rhs1, %land.rhs
%2 = phi i1 [ false, %land.rhs ], [ true, %land.rhs1 ]
%land.ext = zext i1 %2 to i32
%conv = trunc i32 %land.ext to i16
%conv2 = sext i16 %conv to i32
%tobool3 = icmp ne i32 %conv2, 0
br label %land.end4
land.end4: ; preds = %land.end, %entry
%3 = phi i1 [ false, %entry ], [ %tobool3, %land.end ]
%land.ext5 = zext i1 %3 to i32
ret i32 0
}

View File

@ -0,0 +1,5 @@
; RUN: opt < %s -adce -disable-output
define void @test() {
unreachable
}

View File

@ -0,0 +1,18 @@
; RUN: opt < %s -adce -simplifycfg | llvm-dis
; RUN: opt < %s -passes=adce | llvm-dis
define i32 @Test(i32 %A, i32 %B) {
BB1:
br label %BB4
BB2: ; No predecessors!
br label %BB3
BB3: ; preds = %BB4, %BB2
%ret = phi i32 [ %X, %BB4 ], [ %B, %BB2 ] ; <i32> [#uses=1]
ret i32 %ret
BB4: ; preds = %BB1
%X = phi i32 [ %A, %BB1 ] ; <i32> [#uses=1]
br label %BB3
}

View File

@ -0,0 +1,62 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Basic DWARF discriminator test. All the instructions in block
; 'if.then' should have a different discriminator value than
; the conditional branch at the end of block 'entry'.
;
; Original code:
;
; void foo(int i) {
; int x;
; if (i < 10) x = i;
; }
define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%0 = load i32, i32* %i.addr, align 4, !dbg !10
%cmp = icmp slt i32 %0, 10, !dbg !10
br i1 %cmp, label %if.then, label %if.end, !dbg !10
if.then: ; preds = %entry
%1 = load i32, i32* %i.addr, align 4, !dbg !10
; CHECK: %1 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
store i32 %1, i32* %x, align 4, !dbg !10
; CHECK: store i32 %1, i32* %x, align 4, !dbg ![[THEN]]
br label %if.end, !dbg !10
; CHECK: br label %if.end, !dbg ![[THEN]]
if.end: ; preds = %if.then, %entry
ret void, !dbg !12
; CHECK: ret void, !dbg ![[END:[0-9]+]]
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "basic.c", directory: ".")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
!5 = !DIFile(filename: "basic.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.5 "}
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!12 = !DILocation(line: 4, scope: !4)
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 2)
; CHECK: ![[END]] = !DILocation(line: 4, scope: ![[FOO]])

View File

@ -0,0 +1,50 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for calls that are defined in one line:
; #1 int foo(int, int);
; #2 int bar();
; #3 int baz() {
; #4 return foo(bar(),
; #5 bar());
; #6 }
; Function Attrs: uwtable
define i32 @_Z3bazv() #0 !dbg !4 {
%1 = call i32 @_Z3barv(), !dbg !11
; CHECK: %1 = call i32 @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
%2 = call i32 @_Z3barv(), !dbg !12
; CHECK: %2 = call i32 @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
%3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg !13
; CHECK: %3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg ![[CALL2:[0-9]+]]
ret i32 %3, !dbg !14
}
declare i32 @_Z3fooii(i32, i32) #1
declare i32 @_Z3barv() #1
attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266269)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test.cc", directory: "")
!2 = !{}
!4 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.9.0 (trunk 266269)"}
!11 = !DILocation(line: 4, column: 14, scope: !4)
!12 = !DILocation(line: 5, column: 14, scope: !4)
!13 = !DILocation(line: 4, column: 10, scope: !4)
!14 = !DILocation(line: 4, column: 3, scope: !4)
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]])
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)

View File

@ -0,0 +1,54 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for calls that are defined in one line:
; #1 void bar();
; #2
; #3 void foo() {
; #4 bar();bar()/*discriminator 2*/;bar()/*discriminator 4*/;
; #5 }
; Function Attrs: uwtable
define void @_Z3foov() #0 !dbg !4 {
call void @_Z3barv(), !dbg !10
; CHECK: call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
%a = alloca [100 x i8], align 16
%b = bitcast [100 x i8]* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 100, i8* %b), !dbg !11
call void @llvm.lifetime.end.p0i8(i64 100, i8* %b), !dbg !11
call void @_Z3barv(), !dbg !11
; CHECK: call void @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
call void @_Z3barv(), !dbg !12
; CHECK: call void @_Z3barv(), !dbg ![[CALL2:[0-9]+]]
ret void, !dbg !13
}
declare void @_Z3barv() #1
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind argmemonly
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind argmemonly
attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "c.cc", directory: "/tmp")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)"}
!10 = !DILocation(line: 4, column: 3, scope: !4)
!11 = !DILocation(line: 4, column: 9, scope: !4)
!12 = !DILocation(line: 4, column: 15, scope: !4)
!13 = !DILocation(line: 5, column: 1, scope: !4)
; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)

View File

@ -0,0 +1,33 @@
; RUN: opt -S -add-discriminators < %s | FileCheck %s
; RUN: opt -S -passes=add-discriminators < %s | FileCheck %s
declare void @llvm.dbg.declare(metadata, metadata, metadata)
; This checks whether the add-discriminators pass producess valid metadata on
; llvm.dbg.declare instructions
;
; CHECK-LABEL: @test_valid_metadata
define void @test_valid_metadata() {
%a = alloca i8
call void @llvm.dbg.declare(metadata i8* %a, metadata !2, metadata !5), !dbg !6
%b = alloca i8
call void @llvm.dbg.declare(metadata i8* %b, metadata !9, metadata !5), !dbg !11
ret void
}
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!12}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !DILocalVariable(scope: !3)
!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!4 = !DIFile(filename: "a.cpp", directory: "/tmp")
!5 = !DIExpression()
!6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
!7 = distinct !DILocation(line: 0, scope: !8)
!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!9 = !DILocalVariable(scope: !10)
!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!11 = !DILocation(line: 0, scope: !10)
!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)

View File

@ -0,0 +1,72 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for diamond-shaped CFG.:
; #1 void bar(int);
; #2
; #3 void foo(int i) {
; #4 if (i > 10)
; #5 bar(5); else bar(3);
; #6 }
; bar(5): discriminator 0
; bar(3): discriminator 2
; Function Attrs: uwtable
define void @_Z3fooi(i32 %i) #0 !dbg !4 {
%1 = alloca i32, align 4
store i32 %i, i32* %1, align 4
call void @llvm.dbg.declare(metadata i32* %1, metadata !11, metadata !12), !dbg !13
%2 = load i32, i32* %1, align 4, !dbg !14
%3 = icmp sgt i32 %2, 10, !dbg !16
br i1 %3, label %4, label %5, !dbg !17
; <label>:4 ; preds = %0
call void @_Z3bari(i32 5), !dbg !18
br label %6, !dbg !18
; <label>:5 ; preds = %0
call void @_Z3bari(i32 3), !dbg !19
; CHECK: call void @_Z3bari(i32 3), !dbg ![[ELSE:[0-9]+]]
br label %6
; <label>:6 ; preds = %5, %4
ret void, !dbg !20
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @_Z3bari(i32) #2
attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "a.cc", directory: "/tmp")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.8.0 (trunk 253273)"}
!11 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !7)
!12 = !DIExpression()
!13 = !DILocation(line: 3, column: 14, scope: !4)
!14 = !DILocation(line: 4, column: 7, scope: !15)
!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
!16 = !DILocation(line: 4, column: 9, scope: !15)
!17 = !DILocation(line: 4, column: 7, scope: !4)
!18 = !DILocation(line: 5, column: 5, scope: !15)
!19 = !DILocation(line: 5, column: 18, scope: !15)
!20 = !DILocation(line: 6, column: 1, scope: !4)
; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)

View File

@ -0,0 +1,83 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Test that the only instructions that receive a new discriminator in
; the block 'if.then' are those that share the same line number as
; the branch in 'entry'.
;
; Original code:
;
; void foo(int i) {
; int x, y;
; if (i < 10) { x = i;
; y = -i;
; }
; }
define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
%y = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%0 = load i32, i32* %i.addr, align 4, !dbg !10
%cmp = icmp slt i32 %0, 10, !dbg !10
br i1 %cmp, label %if.then, label %if.end, !dbg !10
if.then: ; preds = %entry
%1 = load i32, i32* %i.addr, align 4, !dbg !12
store i32 %1, i32* %x, align 4, !dbg !12
%2 = load i32, i32* %i.addr, align 4, !dbg !14
; CHECK: %2 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
%sub = sub nsw i32 0, %2, !dbg !14
; CHECK: %sub = sub nsw i32 0, %2, !dbg ![[THEN]]
store i32 %sub, i32* %y, align 4, !dbg !14
; CHECK: store i32 %sub, i32* %y, align 4, !dbg ![[THEN]]
br label %if.end, !dbg !15
; CHECK: br label %if.end, !dbg ![[BR:[0-9]+]]
if.end: ; preds = %if.then, %entry
ret void, !dbg !16
; CHECK: ret void, !dbg ![[END:[0-9]+]]
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "first-only.c", directory: ".")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
!5 = !DIFile(filename: "first-only.c", directory: ".")
!6 = !DISubroutineType(types: !{null})
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK1:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
!12 = !DILocation(line: 3, scope: !13)
!13 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !11)
; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 2)
!14 = !DILocation(line: 4, scope: !13)
; CHECK: ![[BLOCK2]] = distinct !DILexicalBlock(scope: ![[BLOCK1]],{{.*}} line: 3)
!15 = !DILocation(line: 5, scope: !13)
; CHECK: ![[THEN]] = !DILocation(line: 4, scope: ![[BLOCK2]])
!16 = !DILocation(line: 6, scope: !4)
; CHECK: ![[BR]] = !DILocation(line: 5, scope: ![[BLOCK2]])
; CHECK: ![[END]] = !DILocation(line: 6, scope: ![[FOO]])

View File

@ -0,0 +1,83 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
;
; Generated at -O3 from:
; g();f(){for(;;){g();}}g(){__builtin___memset_chk(0,0,0,__builtin_object_size(1,0));}
; The fact that everything is on one line is significant!
;
; This test ensures that inline info isn't dropped even if the call site and the
; inlined function are defined on the same line.
source_filename = "t.c"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios"
; Function Attrs: noreturn nounwind ssp
define i32 @f() local_unnamed_addr #0 !dbg !7 {
entry:
%0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* inttoptr (i64 1 to i8*), i1 false) #2, !dbg !11
br label %for.cond, !dbg !18
for.cond: ; preds = %for.cond, %entry
; CHECK: %call.i
%call.i = tail call i8* @__memset_chk(i8* null, i32 0, i64 0, i64 %0) #2, !dbg !19
; CHECK: br label %for.cond, !dbg ![[BR:[0-9]+]]
br label %for.cond, !dbg !20, !llvm.loop !21
}
; Function Attrs: nounwind ssp
define i32 @g() local_unnamed_addr #1 !dbg !12 {
entry:
%0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* inttoptr (i64 1 to i8*), i1 false), !dbg !22
%call = tail call i8* @__memset_chk(i8* null, i32 0, i64 0, i64 %0) #2, !dbg !23
ret i32 undef, !dbg !24
}
; Function Attrs: nounwind
declare i8* @__memset_chk(i8*, i32, i64, i64) local_unnamed_addr #2
; Function Attrs: nounwind readnone
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #3
attributes #0 = { noreturn nounwind ssp }
attributes #1 = { nounwind ssp }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "t.c", directory: "/")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"PIC Level", i32 2}
!6 = !{!"LLVM version 4.0.0"}
; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "f",
!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !DILocation(line: 1, column: 56, scope: !12, inlinedAt: !13)
!12 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
!13 = distinct !DILocation(line: 1, column: 17, scope: !14)
; CHECK: ![[BF:.*]] = !DILexicalBlockFile(scope: ![[LB1:[0-9]+]],
; CHECK-SAME: discriminator: 2)
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
; CHECK: ![[LB1]] = distinct !DILexicalBlock(scope: ![[LB2:[0-9]+]],
; CHECK-SAME: line: 1, column: 16)
!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 1, column: 16)
; CHECK: ![[LB2]] = distinct !DILexicalBlock(scope: ![[LB3:[0-9]+]],
; CHECK-SAME: line: 1, column: 9)
!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 1, column: 9)
; CHECK: ![[LB3]] = distinct !DILexicalBlock(scope: ![[F]],
; CHECK-SAME: line: 1, column: 9)
!17 = distinct !DILexicalBlock(scope: !7, file: !1, line: 1, column: 9)
!18 = !DILocation(line: 1, column: 9, scope: !7)
!19 = !DILocation(line: 1, column: 27, scope: !12, inlinedAt: !13)
; CHECK: ![[BR]] = !DILocation(line: 1, column: 9, scope: !14)
!20 = !DILocation(line: 1, column: 9, scope: !14)
!21 = distinct !{!21, !18}
!22 = !DILocation(line: 1, column: 56, scope: !12)
!23 = !DILocation(line: 1, column: 27, scope: !12)
!24 = !DILocation(line: 1, column: 84, scope: !12)

View File

@ -0,0 +1,134 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; ModuleID = 'invoke.bc'
source_filename = "invoke.cpp"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
; Function Attrs: ssp uwtable
define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !8 {
entry:
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
; CHECK: call void @_Z12bar_noexceptv({{.*}} !dbg ![[CALL1:[0-9]+]]
call void @_Z12bar_noexceptv() #4, !dbg !11
; CHECK: call void @_Z12bar_noexceptv({{.*}} !dbg ![[CALL2:[0-9]+]]
call void @_Z12bar_noexceptv() #4, !dbg !13
invoke void @_Z3barv()
; CHECK: unwind label {{.*}} !dbg ![[INVOKE:[0-9]+]]
to label %invoke.cont unwind label %lpad, !dbg !14
invoke.cont: ; preds = %entry
br label %try.cont, !dbg !15
lpad: ; preds = %entry
%0 = landingpad { i8*, i32 }
catch i8* null, !dbg !16
%1 = extractvalue { i8*, i32 } %0, 0, !dbg !16
store i8* %1, i8** %exn.slot, align 8, !dbg !16
%2 = extractvalue { i8*, i32 } %0, 1, !dbg !16
store i32 %2, i32* %ehselector.slot, align 4, !dbg !16
br label %catch, !dbg !16
catch: ; preds = %lpad
%exn = load i8*, i8** %exn.slot, align 8, !dbg !15
%3 = call i8* @__cxa_begin_catch(i8* %exn) #4, !dbg !15
invoke void @__cxa_rethrow() #5
to label %unreachable unwind label %lpad1, !dbg !17
lpad1: ; preds = %catch
%4 = landingpad { i8*, i32 }
cleanup, !dbg !19
%5 = extractvalue { i8*, i32 } %4, 0, !dbg !19
store i8* %5, i8** %exn.slot, align 8, !dbg !19
%6 = extractvalue { i8*, i32 } %4, 1, !dbg !19
store i32 %6, i32* %ehselector.slot, align 4, !dbg !19
invoke void @__cxa_end_catch()
to label %invoke.cont2 unwind label %terminate.lpad, !dbg !20
invoke.cont2: ; preds = %lpad1
br label %eh.resume, !dbg !20
try.cont: ; preds = %invoke.cont
ret void, !dbg !21
eh.resume: ; preds = %invoke.cont2
%exn3 = load i8*, i8** %exn.slot, align 8, !dbg !20
%sel = load i32, i32* %ehselector.slot, align 4, !dbg !20
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0, !dbg !20
%lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1, !dbg !20
resume { i8*, i32 } %lpad.val4, !dbg !20
terminate.lpad: ; preds = %lpad1
%7 = landingpad { i8*, i32 }
catch i8* null, !dbg !20
%8 = extractvalue { i8*, i32 } %7, 0, !dbg !20
call void @__clang_call_terminate(i8* %8) #6, !dbg !20
unreachable, !dbg !20
unreachable: ; preds = %catch
unreachable
}
; Function Attrs: nounwind
declare void @_Z12bar_noexceptv() #1
declare void @_Z3barv() #2
declare i32 @__gxx_personality_v0(...)
declare i8* @__cxa_begin_catch(i8*)
declare void @__cxa_rethrow()
declare void @__cxa_end_catch()
; Function Attrs: noinline noreturn nounwind
define linkonce_odr hidden void @__clang_call_terminate(i8*) #3 {
%2 = call i8* @__cxa_begin_catch(i8* %0) #4
call void @_ZSt9terminatev() #6
unreachable
}
declare void @_ZSt9terminatev()
attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { noinline noreturn nounwind }
attributes #4 = { nounwind }
attributes #5 = { noreturn }
attributes #6 = { noreturn nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!llvm.ident = !{!7}
; CHECK: ![[CALL1]] = !DILocation(line: 7, column: 5, scope: ![[SCOPE1:[0-9]+]])
; CHECK: ![[SCOPE1]] = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
; CHECK: ![[CALL2]] = !DILocation(line: 7, column: 21, scope: ![[SCOPE2:[0-9]+]])
; CHECK: ![[SCOPE2]] = !DILexicalBlockFile(scope: ![[SCOPE1]], file: !1, discriminator: 2)
; CHECK: ![[INVOKE]] = !DILocation(line: 7, column: 37, scope: ![[SCOPE3:[0-9]+]])
; CHECK: ![[SCOPE3]] = !DILexicalBlockFile(scope: ![[SCOPE1]], file: !1, discriminator: 4)
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: GNU)
!1 = !DIFile(filename: "invoke.cpp", directory: "examples")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"PIC Level", i32 2}
!7 = !{!"clang version 8.0.0"}
!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{null}
!11 = !DILocation(line: 7, column: 5, scope: !12)
!12 = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
!13 = !DILocation(line: 7, column: 21, scope: !12)
!14 = !DILocation(line: 7, column: 37, scope: !12)
!15 = !DILocation(line: 8, column: 3, scope: !12)
!16 = !DILocation(line: 12, column: 1, scope: !12)
!17 = !DILocation(line: 10, column: 5, scope: !18)
!18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9, column: 15)
!19 = !DILocation(line: 12, column: 1, scope: !18)
!20 = !DILocation(line: 11, column: 3, scope: !18)
!21 = !DILocation(line: 12, column: 1, scope: !8)

View File

@ -0,0 +1,104 @@
; RUN: opt < %s -add-discriminators -sroa -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Test case obtained from the following C code:
; struct A {
; int field1;
; short field2;
; };
;
; struct B {
; struct A field1;
; int field2;
; };
;
;
; extern struct B g_b;
; extern int bar(struct B b, int c);
;
; int foo(int cond) {
; int result = cond ? bar(g_b, 33) : 42;
; return result;
; }
; In this test, global variable g_b is passed by copy to function bar. That
; copy is located on the stack (see alloca %g_b.coerce), and it is initialized
; by a memcpy call.
;
; SROA would split alloca %g_b.coerce into two (smaller disjoint) slices:
; slice [0,8) and slice [8, 12). Users of the original alloca are rewritten
; as users of the new alloca slices.
; In particular, the memcpy is rewritten by SROA as two load/store pairs.
;
; Later on, mem2reg successfully promotes the new alloca slices to registers,
; and loads %3 and %5 are made redundant by the loads obtained from the memcpy
; intrinsic expansion.
;
; If pass AddDiscriminators doesn't assign a discriminator to the intrinsic
; memcpy call, then the loads obtained from the memcpy expansion would not have
; a correct discriminator.
;
; This test checks that the two new loads inserted by SROA in %cond.true
; correctly reference a debug location with a non-zero discriminator. This test
; also checks that the same discriminator is used by all instructions from
; basic block %cond.true.
%struct.B = type { %struct.A, i32 }
%struct.A = type { i32, i16 }
@g_b = external global %struct.B, align 4
define i32 @foo(i32 %cond) #0 !dbg !5 {
entry:
%g_b.coerce = alloca { i64, i32 }, align 4
%tobool = icmp ne i32 %cond, 0, !dbg !7
br i1 %tobool, label %cond.true, label %cond.end, !dbg !7
cond.true:
; CHECK-LABEL: cond.true:
; CHECK: load i64, {{.*}}, !dbg ![[LOC:[0-9]+]]
; CHECK-NEXT: load i32, {{.*}}, !dbg ![[LOC]]
; CHECK-NEXT: %call = call i32 @bar({{.*}}), !dbg ![[LOC]]
; CHECK-NEXT: br label %cond.end, !dbg ![[BR_LOC:[0-9]+]]
; CHECK-DAG: ![[LOC]] = !DILocation(line: 16, column: 23, scope: ![[SCOPE:[0-9]+]])
; CHECK-DAG: ![[SCOPE]] = !DILexicalBlockFile({{.*}}, discriminator: 2)
; CHECK-DAG: ![[BR_LOC]] = !DILocation(line: 16, column: 16, scope: ![[SCOPE]])
%0 = bitcast { i64, i32 }* %g_b.coerce to i8*, !dbg !8
%1 = bitcast %struct.B* @g_b to i8*, !dbg !8
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 12, i1 false), !dbg !8
%2 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %g_b.coerce, i32 0, i32 0, !dbg !8
%3 = load i64, i64* %2, align 4, !dbg !8
%4 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %g_b.coerce, i32 0, i32 1, !dbg !8
%5 = load i32, i32* %4, align 4, !dbg !8
%call = call i32 @bar(i64 %3, i32 %5, i32 33), !dbg !8
br label %cond.end, !dbg !7
cond.end: ; preds = %entry, %cond.true
%cond1 = phi i32 [ %call, %cond.true ], [ 42, %entry ], !dbg !7
ret i32 %cond1, !dbg !9
}
declare i32 @bar(i64, i32, i32)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
attributes #0 = { noinline nounwind uwtable }
attributes #1 = { argmemonly nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
!1 = !DIFile(filename: "test.c", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 15, type: !6, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!6 = !DISubroutineType(types: !2)
!7 = !DILocation(line: 16, column: 16, scope: !5)
!8 = !DILocation(line: 16, column: 23, scope: !5)
!9 = !DILocation(line: 17, column: 3, scope: !5)

View File

@ -0,0 +1,72 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for multiple CFG paths on the same line.
;
; void foo(int i) {
; int x;
; if (i < 10) x = i; else x = -i;
; }
;
; The two stores inside the if-then-else line must have different discriminator
; values.
define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%0 = load i32, i32* %i.addr, align 4, !dbg !10
%cmp = icmp slt i32 %0, 10, !dbg !10
br i1 %cmp, label %if.then, label %if.else, !dbg !10
if.then: ; preds = %entry
%1 = load i32, i32* %i.addr, align 4, !dbg !10
; CHECK: %1 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
store i32 %1, i32* %x, align 4, !dbg !10
; CHECK: store i32 %1, i32* %x, align 4, !dbg ![[THEN]]
br label %if.end, !dbg !10
; CHECK: br label %if.end, !dbg ![[THEN]]
if.else: ; preds = %entry
%2 = load i32, i32* %i.addr, align 4, !dbg !10
; CHECK: %2 = load i32, i32* %i.addr, align 4, !dbg ![[ELSE:[0-9]+]]
%sub = sub nsw i32 0, %2, !dbg !10
; CHECK: %sub = sub nsw i32 0, %2, !dbg ![[ELSE]]
store i32 %sub, i32* %x, align 4, !dbg !10
; CHECK: store i32 %sub, i32* %x, align 4, !dbg ![[ELSE]]
br label %if.end
if.end: ; preds = %if.else, %if.then
ret void, !dbg !12
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "multiple.c", directory: ".")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
!5 = !DIFile(filename: "multiple.c", directory: ".")
!6 = !DISubroutineType(types: !{null, !13})
!13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!12 = !DILocation(line: 4, scope: !4)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[THENBLOCK:[0-9]+]])
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 2)
; CHECK: ![[ELSE]] = !DILocation(line: 3, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 4)

View File

@ -0,0 +1,76 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; We should not generate discriminators for DWARF versions prior to 4.
;
; Original code:
;
; int foo(long i) {
; if (i < 5) return 2; else return 90;
; }
;
; None of the !dbg nodes associated with the if() statement should be
; altered. If they are, it means that the discriminators pass added a
; new lexical scope.
define i32 @foo(i64 %i) #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
%i.addr = alloca i64, align 8
store i64 %i, i64* %i.addr, align 8
call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !13, metadata !DIExpression()), !dbg !14
%0 = load i64, i64* %i.addr, align 8, !dbg !15
; CHECK: %0 = load i64, i64* %i.addr, align 8, !dbg ![[ENTRY:[0-9]+]]
%cmp = icmp slt i64 %0, 5, !dbg !15
; CHECK: %cmp = icmp slt i64 %0, 5, !dbg ![[ENTRY:[0-9]+]]
br i1 %cmp, label %if.then, label %if.else, !dbg !15
; CHECK: br i1 %cmp, label %if.then, label %if.else, !dbg ![[ENTRY:[0-9]+]]
if.then: ; preds = %entry
store i32 2, i32* %retval, !dbg !15
br label %return, !dbg !15
if.else: ; preds = %entry
store i32 90, i32* %retval, !dbg !15
br label %return, !dbg !15
return: ; preds = %if.else, %if.then
%1 = load i32, i32* %retval, !dbg !17
ret i32 %1, !dbg !17
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
; We should be able to add discriminators even in the absence of llvm.dbg.cu.
; When using sample profiles, the front end will generate line tables but it
; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
; to the final binary.
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
; CHECK: !{i32 2, !"Dwarf Version", i32 2}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "no-discriminators", directory: ".")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
!5 = !DIFile(filename: "no-discriminators", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
!10 = !{i32 2, !"Dwarf Version", i32 2}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5.0 "}
!13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
!14 = !DILocation(line: 1, scope: !4)
!15 = !DILocation(line: 2, scope: !16)
; CHECK: ![[ENTRY]] = !DILocation(line: 2, scope: ![[BLOCK:[0-9]+]])
!16 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
; CHECK: ![[BLOCK]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 2)
!17 = !DILocation(line: 3, scope: !4)

View File

@ -0,0 +1,101 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for code that is written in one line:
; #1 int foo(int i) {
; #2 if (i == 3 || i == 5) return 100; else return 99;
; #3 }
; i == 3: discriminator 0
; i == 5: discriminator 2
; return 100: discriminator 4
; return 99: discriminator 6
define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %i, i32* %2, align 4, !tbaa !13
call void @llvm.dbg.declare(metadata i32* %2, metadata !9, metadata !17), !dbg !18
%3 = load i32, i32* %2, align 4, !dbg !19, !tbaa !13
%4 = icmp eq i32 %3, 3, !dbg !21
br i1 %4, label %8, label %5, !dbg !22
; <label>:5 ; preds = %0
%6 = load i32, i32* %2, align 4, !dbg !23, !tbaa !13
; CHECK: %6 = load i32, i32* %2, align 4, !dbg ![[THEN1:[0-9]+]],{{.*}}
%7 = icmp eq i32 %6, 5, !dbg !24
; CHECK: %7 = icmp eq i32 %6, 5, !dbg ![[THEN2:[0-9]+]]
br i1 %7, label %8, label %9, !dbg !25
; CHECK: br i1 %7, label %8, label %9, !dbg ![[THEN3:[0-9]+]]
; <label>:8 ; preds = %5, %0
store i32 100, i32* %1, align 4, !dbg !26
; CHECK: store i32 100, i32* %1, align 4, !dbg ![[ELSE:[0-9]+]]
br label %10, !dbg !26
; CHECK: br label %10, !dbg ![[ELSE]]
; <label>:9 ; preds = %5
store i32 99, i32* %1, align 4, !dbg !27
; CHECK: store i32 99, i32* %1, align 4, !dbg ![[COMBINE:[0-9]+]]
br label %10, !dbg !27
; CHECK: br label %10, !dbg ![[COMBINE]]
; <label>:10 ; preds = %9, %8
%11 = load i32, i32* %1, align 4, !dbg !28
ret i32 %11, !dbg !28
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{!9}
!9 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 1, type: !7)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.8.0 (trunk 250915)"}
!13 = !{!14, !14, i64 0}
!14 = !{!"int", !15, i64 0}
!15 = !{!"omnipotent char", !16, i64 0}
!16 = !{!"Simple C/C++ TBAA"}
!17 = !DIExpression()
!18 = !DILocation(line: 1, column: 13, scope: !4)
!19 = !DILocation(line: 2, column: 7, scope: !20)
!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
!21 = !DILocation(line: 2, column: 9, scope: !20)
!22 = !DILocation(line: 2, column: 14, scope: !20)
!23 = !DILocation(line: 2, column: 17, scope: !20)
!24 = !DILocation(line: 2, column: 19, scope: !20)
!25 = !DILocation(line: 2, column: 7, scope: !4)
!26 = !DILocation(line: 2, column: 25, scope: !20)
!27 = !DILocation(line: 2, column: 42, scope: !20)
!28 = !DILocation(line: 3, column: 1, scope: !4)
; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "foo",
; CHECK: ![[IF:.*]] = distinct !DILexicalBlock(scope: ![[F]],{{.*}}line: 2, column: 7)
; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[BRBLOCK:[0-9]+]])
; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 2)
; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 4)
; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 6)

View File

@ -0,0 +1,235 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
; PR37098 - https://bugs.llvm.org/show_bug.cgi?id=37098
define i32 @anyset_two_bit_mask(i32 %x) {
; CHECK-LABEL: @anyset_two_bit_mask(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 9
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%s = lshr i32 %x, 3
%o = or i32 %s, %x
%r = and i32 %o, 1
ret i32 %r
}
define i32 @anyset_four_bit_mask(i32 %x) {
; CHECK-LABEL: @anyset_four_bit_mask(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 297
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%t1 = lshr i32 %x, 3
%t2 = lshr i32 %x, 5
%t3 = lshr i32 %x, 8
%o1 = or i32 %t1, %x
%o2 = or i32 %t2, %t3
%o3 = or i32 %o1, %o2
%r = and i32 %o3, 1
ret i32 %r
}
; We're not testing the LSB here, so all of the 'or' operands are shifts.
define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 296
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%t1 = lshr i32 %x, 3
%t2 = lshr i32 %x, 5
%t3 = lshr i32 %x, 8
%o2 = or i32 %t2, %t3
%o3 = or i32 %t1, %o2
%r = and i32 %o3, 1
ret i32 %r
}
; Recognize the 'and' sibling pattern (all-bits-set). The 'and 1' may not be at the end.
define i32 @allset_two_bit_mask(i32 %x) {
; CHECK-LABEL: @allset_two_bit_mask(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 129
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 129
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%s = lshr i32 %x, 7
%o = and i32 %s, %x
%r = and i32 %o, 1
ret i32 %r
}
define i64 @allset_four_bit_mask(i64 %x) {
; CHECK-LABEL: @allset_four_bit_mask(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 30
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 30
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64
; CHECK-NEXT: ret i64 [[TMP3]]
;
%t1 = lshr i64 %x, 1
%t2 = lshr i64 %x, 2
%t3 = lshr i64 %x, 3
%t4 = lshr i64 %x, 4
%a1 = and i64 %t4, 1
%a2 = and i64 %t2, %a1
%a3 = and i64 %a2, %t1
%r = and i64 %a3, %t3
ret i64 %r
}
declare void @use(i32)
; negative test - extra use means the transform would increase instruction count
define i32 @allset_two_bit_mask_multiuse(i32 %x) {
; CHECK-LABEL: @allset_two_bit_mask_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 7
; CHECK-NEXT: [[O:%.*]] = and i32 [[S]], [[X]]
; CHECK-NEXT: [[R:%.*]] = and i32 [[O]], 1
; CHECK-NEXT: call void @use(i32 [[O]])
; CHECK-NEXT: ret i32 [[R]]
;
%s = lshr i32 %x, 7
%o = and i32 %s, %x
%r = and i32 %o, 1
call void @use(i32 %o)
ret i32 %r
}
; negative test - missing 'and 1' mask, so more than the low bit is used here
define i8 @allset_three_bit_mask_no_and1(i8 %x) {
; CHECK-LABEL: @allset_three_bit_mask_no_and1(
; CHECK-NEXT: [[T1:%.*]] = lshr i8 [[X:%.*]], 1
; CHECK-NEXT: [[T2:%.*]] = lshr i8 [[X]], 2
; CHECK-NEXT: [[T3:%.*]] = lshr i8 [[X]], 3
; CHECK-NEXT: [[A2:%.*]] = and i8 [[T1]], [[T2]]
; CHECK-NEXT: [[R:%.*]] = and i8 [[A2]], [[T3]]
; CHECK-NEXT: ret i8 [[R]]
;
%t1 = lshr i8 %x, 1
%t2 = lshr i8 %x, 2
%t3 = lshr i8 %x, 3
%a2 = and i8 %t1, %t2
%r = and i8 %a2, %t3
ret i8 %r
}
; This test demonstrates that the transform can be large. If the implementation
; is slow or explosive (stack overflow due to recursion), it should be made efficient.
define i64 @allset_40_bit_mask(i64 %x) {
; CHECK-LABEL: @allset_40_bit_mask(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2199023255550
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2199023255550
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64
; CHECK-NEXT: ret i64 [[TMP3]]
;
%t1 = lshr i64 %x, 1
%t2 = lshr i64 %x, 2
%t3 = lshr i64 %x, 3
%t4 = lshr i64 %x, 4
%t5 = lshr i64 %x, 5
%t6 = lshr i64 %x, 6
%t7 = lshr i64 %x, 7
%t8 = lshr i64 %x, 8
%t9 = lshr i64 %x, 9
%t10 = lshr i64 %x, 10
%t11 = lshr i64 %x, 11
%t12 = lshr i64 %x, 12
%t13 = lshr i64 %x, 13
%t14 = lshr i64 %x, 14
%t15 = lshr i64 %x, 15
%t16 = lshr i64 %x, 16
%t17 = lshr i64 %x, 17
%t18 = lshr i64 %x, 18
%t19 = lshr i64 %x, 19
%t20 = lshr i64 %x, 20
%t21 = lshr i64 %x, 21
%t22 = lshr i64 %x, 22
%t23 = lshr i64 %x, 23
%t24 = lshr i64 %x, 24
%t25 = lshr i64 %x, 25
%t26 = lshr i64 %x, 26
%t27 = lshr i64 %x, 27
%t28 = lshr i64 %x, 28
%t29 = lshr i64 %x, 29
%t30 = lshr i64 %x, 30
%t31 = lshr i64 %x, 31
%t32 = lshr i64 %x, 32
%t33 = lshr i64 %x, 33
%t34 = lshr i64 %x, 34
%t35 = lshr i64 %x, 35
%t36 = lshr i64 %x, 36
%t37 = lshr i64 %x, 37
%t38 = lshr i64 %x, 38
%t39 = lshr i64 %x, 39
%t40 = lshr i64 %x, 40
%a1 = and i64 %t1, 1
%a2 = and i64 %t2, %a1
%a3 = and i64 %t3, %a2
%a4 = and i64 %t4, %a3
%a5 = and i64 %t5, %a4
%a6 = and i64 %t6, %a5
%a7 = and i64 %t7, %a6
%a8 = and i64 %t8, %a7
%a9 = and i64 %t9, %a8
%a10 = and i64 %t10, %a9
%a11 = and i64 %t11, %a10
%a12 = and i64 %t12, %a11
%a13 = and i64 %t13, %a12
%a14 = and i64 %t14, %a13
%a15 = and i64 %t15, %a14
%a16 = and i64 %t16, %a15
%a17 = and i64 %t17, %a16
%a18 = and i64 %t18, %a17
%a19 = and i64 %t19, %a18
%a20 = and i64 %t20, %a19
%a21 = and i64 %t21, %a20
%a22 = and i64 %t22, %a21
%a23 = and i64 %t23, %a22
%a24 = and i64 %t24, %a23
%a25 = and i64 %t25, %a24
%a26 = and i64 %t26, %a25
%a27 = and i64 %t27, %a26
%a28 = and i64 %t28, %a27
%a29 = and i64 %t29, %a28
%a30 = and i64 %t30, %a29
%a31 = and i64 %t31, %a30
%a32 = and i64 %t32, %a31
%a33 = and i64 %t33, %a32
%a34 = and i64 %t34, %a33
%a35 = and i64 %t35, %a34
%a36 = and i64 %t36, %a35
%a37 = and i64 %t37, %a36
%a38 = and i64 %t38, %a37
%a39 = and i64 %t39, %a38
%a40 = and i64 %t40, %a39
ret i64 %a40
}
; Verify that unsimplified code doesn't crash:
; https://bugs.llvm.org/show_bug.cgi?id=37446
define i32 @PR37446(i32 %x) {
; CHECK-LABEL: @PR37446(
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, 33
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 15
; CHECK-NEXT: [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
; CHECK-NEXT: ret i32 [[AND1]]
;
%shr = lshr i32 1, 33
%and = and i32 %shr, 15
%and1 = and i32 %and, %x
ret i32 %and1
}

View File

@ -0,0 +1,476 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
; https://bugs.llvm.org/show_bug.cgi?id=34924
define i32 @rotl(i32 %a, i32 %b) {
; CHECK-LABEL: @rotl(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shr = lshr i32 %a, %sub
%shl = shl i32 %a, %b
%or = or i32 %shr, %shl
br label %end
end:
%cond = phi i32 [ %or, %rotbb ], [ %a, %entry ]
ret i32 %cond
}
define i32 @rotl_commute_phi(i32 %a, i32 %b) {
; CHECK-LABEL: @rotl_commute_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shr = lshr i32 %a, %sub
%shl = shl i32 %a, %b
%or = or i32 %shr, %shl
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
define i32 @rotl_commute_or(i32 %a, i32 %b) {
; CHECK-LABEL: @rotl_commute_or(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shr = lshr i32 %a, %sub
%shl = shl i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Verify that the intrinsic is inserted into a valid position.
define i32 @rotl_insert_valid_location(i32 %a, i32 %b) {
; CHECK-LABEL: @rotl_insert_valid_location(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shr = lshr i32 %a, %sub
%shl = shl i32 %a, %b
%or = or i32 %shr, %shl
br label %end
end:
%cond = phi i32 [ %or, %rotbb ], [ %a, %entry ]
%other = phi i32 [ 1, %rotbb ], [ 2, %entry ]
%res = or i32 %cond, %other
ret i32 %res
}
define i32 @rotr(i32 %a, i32 %b) {
; CHECK-LABEL: @rotr(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shr, %shl
br label %end
end:
%cond = phi i32 [ %or, %rotbb ], [ %a, %entry ]
ret i32 %cond
}
define i32 @rotr_commute_phi(i32 %a, i32 %b) {
; CHECK-LABEL: @rotr_commute_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shr, %shl
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
define i32 @rotr_commute_or(i32 %a, i32 %b) {
; CHECK-LABEL: @rotr_commute_or(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - non-power-of-2 might require urem expansion in the backend.
define i12 @could_be_rotr_weird_type(i12 %a, i12 %b) {
; CHECK-LABEL: @could_be_rotr_weird_type(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i12 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i12 12, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i12 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i12 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i12 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i12 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i12 [[COND]]
;
entry:
%cmp = icmp eq i12 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i12 12, %b
%shl = shl i12 %a, %sub
%shr = lshr i12 %a, %b
%or = or i12 %shl, %shr
br label %end
end:
%cond = phi i12 [ %a, %entry ], [ %or, %rotbb ]
ret i12 %cond
}
; Negative test - wrong phi ops.
define i32 @not_rotr_1(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %b, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - too many phi ops.
define i32 @not_rotr_2(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @not_rotr_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: [[CMP42:%.*]] = icmp ugt i32 [[OR]], 42
; CHECK-NEXT: br i1 [[CMP42]], label [[END]], label [[BOGUS:%.*]]
; CHECK: bogus:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ], [ [[C:%.*]], [[BOGUS]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
%cmp42 = icmp ugt i32 %or, 42
br i1 %cmp42, label %end, label %bogus
bogus:
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ], [ %c, %bogus ]
ret i32 %cond
}
; Negative test - wrong cmp (but this should match?).
define i32 @not_rotr_3(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp sle i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - wrong shift.
define i32 @not_rotr_4(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = ashr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - wrong shift.
define i32 @not_rotr_5(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %b, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - wrong sub.
define i32 @not_rotr_6(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_6(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 8, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 8, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}
; Negative test - extra use. Technically, we could transform this
; because it doesn't increase the instruction count, but we're
; being cautious not to cause a potential perf pessimization for
; targets that do not have a rotate instruction.
define i32 @could_be_rotr(i32 %a, i32 %b, i32* %p) {
; CHECK-LABEL: @could_be_rotr(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %end, label %rotbb
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
store i32 %or, i32* %p
br label %end
end:
%cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
ret i32 %cond
}

View File

@ -0,0 +1,110 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; Aggressive Instcombine should be able to reduce width of these constant
; expressions, without crashing.
declare i32 @use32(i32)
declare <2 x i32> @use32_vec(<2 x i32>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; These tests check cases where expression dag post-dominated by TruncInst
;; contains instruction, which has more than one usage.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define void @const_expression_mul() {
; CHECK-LABEL: @const_expression_mul(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 242)
; CHECK-NEXT: ret void
;
%A = mul i64 11, 22
%T = trunc i64 %A to i32
call i32 @use32(i32 %T)
ret void
}
define void @const_expression_zext() {
; CHECK-LABEL: @const_expression_zext(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 33)
; CHECK-NEXT: ret void
;
%A = zext i32 33 to i64
%T = trunc i64 %A to i32
call i32 @use32(i32 %T)
ret void
}
define void @const_expression_trunc() {
; CHECK-LABEL: @const_expression_trunc(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 44)
; CHECK-NEXT: ret void
;
%T = trunc i64 44 to i32
call i32 @use32(i32 %T)
ret void
}
; Check that we handle constant expression trunc instruction, when it is a leaf
; of other trunc expression pattern:
; 1. %T1 is the constant expression trunc instruction.
; 2. %T2->%T1 is the trunc expression pattern we want to reduce.
define void @const_expression_trunc_leaf() {
; CHECK-LABEL: @const_expression_trunc_leaf(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 44)
; CHECK-NEXT: ret void
;
%T1 = trunc i64 44 to i48
%T2 = trunc i48 %T1 to i32
call i32 @use32(i32 %T2)
ret void
}
; Check that we handle zext instruction, which turns into trunc instruction.
; Notice that there are two expression patterns below:
; 1. %T2->%T1
; 2. %T1`->%A (where %T1` is the reduced node of %T1 into trunc instruction)
define void @const_expression_zext_to_trunc() {
; CHECK-LABEL: @const_expression_zext_to_trunc(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 44)
; CHECK-NEXT: ret void
;
%A = add i64 11, 33
%T1 = zext i64 %A to i128
%T2 = trunc i128 %T1 to i32
call i32 @use32(i32 %T2)
ret void
}
define void @const_expression_mul_vec() {
; CHECK-LABEL: @const_expression_mul_vec(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> <i32 24531, i32 24864>)
; CHECK-NEXT: ret void
;
%A = mul <2 x i64> <i64 111, i64 112>, <i64 221, i64 222>
%T = trunc <2 x i64> %A to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T)
ret void
}
define void @const_expression_zext_vec() {
; CHECK-LABEL: @const_expression_zext_vec(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> <i32 331, i32 332>)
; CHECK-NEXT: ret void
;
%A = zext <2 x i32> <i32 331, i32 332> to <2 x i64>
%T = trunc <2 x i64> %A to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T)
ret void
}
define void @const_expression_trunc_vec() {
; CHECK-LABEL: @const_expression_trunc_vec(
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> <i32 551, i32 552>)
; CHECK-NEXT: ret void
;
%T = trunc <2 x i64> <i64 551, i64 552> to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T)
ret void
}

View File

@ -0,0 +1,270 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; Aggressive Instcombine should be able to reduce width of these expressions.
declare i32 @use32(i32)
declare i32 @use64(i64)
declare <2 x i32> @use32_vec(<2 x i32>)
declare <2 x i32> @use64_vec(<2 x i64>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; These tests check cases where expression dag post-dominated by TruncInst
;; contains instruction, which has more than one usage.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
define void @multi_uses_add(i32 %X) {
; CHECK-LABEL: @multi_uses_add(
; CHECK-NEXT: [[A1:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[B1:%.*]] = add i32 [[X]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%B1 = add i64 %A1, 15
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i32
call i32 @use32(i32 %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call i32 @use64(i64 %A1)
ret void
}
define void @multi_uses_or(i32 %X) {
; CHECK-LABEL: @multi_uses_or(
; CHECK-NEXT: [[A1:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[B1:%.*]] = or i32 [[X]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%B1 = or i64 %A1, 15
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i32
call i32 @use32(i32 %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call i32 @use64(i64 %A1)
ret void
}
define void @multi_uses_xor(i32 %X) {
; CHECK-LABEL: @multi_uses_xor(
; CHECK-NEXT: [[A1:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[B1:%.*]] = xor i32 [[X]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%B1 = xor i64 %A1, 15
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i32
call i32 @use32(i32 %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call i32 @use64(i64 %A1)
ret void
}
define void @multi_uses_and(i32 %X) {
; CHECK-LABEL: @multi_uses_and(
; CHECK-NEXT: [[A1:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[B1:%.*]] = and i32 [[X]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%B1 = and i64 %A1, 15
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i32
call i32 @use32(i32 %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call i32 @use64(i64 %A1)
ret void
}
define void @multi_uses_sub(i32 %X, i32 %Y) {
; CHECK-LABEL: @multi_uses_sub(
; CHECK-NEXT: [[A1:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: [[A2:%.*]] = zext i32 [[Y:%.*]] to i64
; CHECK-NEXT: [[B1:%.*]] = sub i32 [[X]], [[Y]]
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use64(i64 [[A2]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%A2 = zext i32 %Y to i64
%B1 = sub i64 %A1, %A2
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i32
call i32 @use32(i32 %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call i32 @use64(i64 %A1)
call i32 @use64(i64 %A2)
ret void
}
define void @multi_use_vec_add(<2 x i32> %X) {
; CHECK-LABEL: @multi_use_vec_add(
; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
; CHECK-NEXT: [[B1:%.*]] = add <2 x i32> [[X]], <i32 15, i32 15>
; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext <2 x i32> %X to <2 x i64>
%B1 = add <2 x i64> %A1, <i64 15, i64 15>
%C1 = mul <2 x i64> %B1, %B1
%T1 = trunc <2 x i64> %C1 to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call <2 x i32> @use64_vec(<2 x i64> %A1)
ret void
}
define void @multi_use_vec_or(<2 x i32> %X) {
; CHECK-LABEL: @multi_use_vec_or(
; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
; CHECK-NEXT: [[B1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext <2 x i32> %X to <2 x i64>
%B1 = or <2 x i64> %A1, <i64 15, i64 15>
%C1 = mul <2 x i64> %B1, %B1
%T1 = trunc <2 x i64> %C1 to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call <2 x i32> @use64_vec(<2 x i64> %A1)
ret void
}
define void @multi_use_vec_xor(<2 x i32> %X) {
; CHECK-LABEL: @multi_use_vec_xor(
; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
; CHECK-NEXT: [[B1:%.*]] = xor <2 x i32> [[X]], <i32 15, i32 15>
; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext <2 x i32> %X to <2 x i64>
%B1 = xor <2 x i64> %A1, <i64 15, i64 15>
%C1 = mul <2 x i64> %B1, %B1
%T1 = trunc <2 x i64> %C1 to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call <2 x i32> @use64_vec(<2 x i64> %A1)
ret void
}
define void @multi_use_vec_and(<2 x i32> %X) {
; CHECK-LABEL: @multi_use_vec_and(
; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[X]], <i32 15, i32 15>
; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
; CHECK-NEXT: ret void
;
%A1 = zext <2 x i32> %X to <2 x i64>
%B1 = and <2 x i64> %A1, <i64 15, i64 15>
%C1 = mul <2 x i64> %B1, %B1
%T1 = trunc <2 x i64> %C1 to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call <2 x i32> @use64_vec(<2 x i64> %A1)
ret void
}
define void @multi_use_vec_sub(<2 x i32> %X, <2 x i32> %Y) {
; CHECK-LABEL: @multi_use_vec_sub(
; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
; CHECK-NEXT: [[A2:%.*]] = zext <2 x i32> [[Y:%.*]] to <2 x i64>
; CHECK-NEXT: [[B1:%.*]] = sub <2 x i32> [[X]], [[Y]]
; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A2]])
; CHECK-NEXT: ret void
;
%A1 = zext <2 x i32> %X to <2 x i64>
%A2 = zext <2 x i32> %Y to <2 x i64>
%B1 = sub <2 x i64> %A1, %A2
%C1 = mul <2 x i64> %B1, %B1
%T1 = trunc <2 x i64> %C1 to <2 x i32>
call <2 x i32> @use32_vec(<2 x i32> %T1)
; make sure zext have another use that is not post-dominated by the TruncInst.
call <2 x i32> @use64_vec(<2 x i64> %A1)
call <2 x i32> @use64_vec(<2 x i64> %A2)
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; These tests check cases where expression dag post-dominated by TruncInst
;; contains TruncInst leaf or ZEXT/SEXT leafs which turn into TruncInst leaves.
;; Check that both expressions are reduced and no TruncInst remains or (was
;; generated).
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Notice that there are two expression patterns below:
; 1. %T2->%C2->(%B2->(%T1, 15), %B2->(%T1, 15))
; 2. %T1`->%C1->(%B1->(%A1, 15), %B1->(%A1, 15))
; (where %T1` is the reduced node of %T1 into trunc instruction)
define void @trunc_as_a_leaf(i32 %X) {
; CHECK-LABEL: @trunc_as_a_leaf(
; CHECK-NEXT: [[B1:%.*]] = add i32 [[X:%.*]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[B2:%.*]] = add i32 [[C1]], 15
; CHECK-NEXT: [[C2:%.*]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C2]])
; CHECK-NEXT: ret void
;
%A1 = zext i32 %X to i64
%B1 = add i64 %A1, 15
%C1 = mul i64 %B1, %B1
%T1 = trunc i64 %C1 to i48 ; leaf trunc
%B2 = add i48 %T1, 15
%C2 = mul i48 %B2, %B2
%T2 = trunc i48 %C2 to i32
call i32 @use32(i32 %T2)
ret void
}
; Notice that there are two expression patterns below:
; 1. %T2->%C2->(%B2->(%T1, 15), %B2->(%T1, 15))
; 2. %T1`->%C1->(%B1->(%A1, 15), %B1->(%A1, 15))
; (where %T1` is the reduced node of %T1 into trunc instruction)
define void @zext_as_a_leaf(i16 %X) {
; CHECK-LABEL: @zext_as_a_leaf(
; CHECK-NEXT: [[A1:%.*]] = zext i16 [[X:%.*]] to i32
; CHECK-NEXT: [[B1:%.*]] = add i32 [[A1]], 15
; CHECK-NEXT: [[C1:%.*]] = mul i32 [[B1]], [[B1]]
; CHECK-NEXT: [[B2:%.*]] = add i32 [[C1]], 15
; CHECK-NEXT: [[C2:%.*]] = mul i32 [[B2]], [[B2]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use32(i32 [[C2]])
; CHECK-NEXT: ret void
;
%A1 = zext i16 %X to i48
%B1 = add i48 %A1, 15
%C1 = mul i48 %B1, %B1
%T1 = zext i48 %C1 to i64 ; leaf zext, which will turn into trunc
%B2 = add i64 %T1, 15
%C2 = mul i64 %B2, %B2
%T2 = trunc i64 %C2 to i32
call i32 @use32(i32 %T2)
ret void
}

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; Aggressive Instcombine should be able ignore unreachable basic block.
define void @func_20() {
; CHECK-LABEL: @func_20(
; CHECK-NEXT: for.body94:
; CHECK-NEXT: unreachable
; CHECK: for.cond641:
; CHECK-NEXT: [[OR722:%.*]] = or i32 [[OR722]], undef
; CHECK-NEXT: [[OR723:%.*]] = or i32 [[OR722]], 1
; CHECK-NEXT: [[CONV724:%.*]] = trunc i32 [[OR723]] to i16
; CHECK-NEXT: br label [[FOR_COND641:%.*]]
;
for.body94:
unreachable
for.cond641:
%or722 = or i32 %or722, undef
%or723 = or i32 %or722, 1
%conv724 = trunc i32 %or723 to i16
br label %for.cond641
}
define void @func_21() {
; CHECK-LABEL: @func_21(
; CHECK-NEXT: for.body94:
; CHECK-NEXT: unreachable
; CHECK: for.cond641:
; CHECK-NEXT: [[OR722:%.*]] = or i32 [[A:%.*]], undef
; CHECK-NEXT: [[A]] = or i32 [[OR722]], undef
; CHECK-NEXT: [[OR723:%.*]] = or i32 [[OR722]], 1
; CHECK-NEXT: [[CONV724:%.*]] = trunc i32 [[OR723]] to i16
; CHECK-NEXT: br label [[FOR_COND641:%.*]]
;
for.body94:
unreachable
for.cond641:
%or722 = or i32 %a, undef
%a = or i32 %or722, undef
%or723 = or i32 %or722, 1
%conv724 = trunc i32 %or723 to i16
br label %for.cond641
}

View File

@ -0,0 +1,216 @@
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = load i32, i32* %a, align 4
ret i32 %0
; CHECK-LABEL: @foo
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%offsetptr = add i64 %ptrint, 24
%maskedptr = and i64 %offsetptr, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%arrayidx = getelementptr inbounds i32, i32* %a, i64 2
%0 = load i32, i32* %arrayidx, align 4
ret i32 %0
; CHECK-LABEL: @foo2
; CHECK: load i32, i32* {{[^,]+}}, align 16
; CHECK: ret i32
}
define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%offsetptr = add i64 %ptrint, 28
%maskedptr = and i64 %offsetptr, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
%0 = load i32, i32* %arrayidx, align 4
ret i32 %0
; CHECK-LABEL: @foo2a
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = load i32, i32* %a, align 4
ret i32 %0
; CHECK-LABEL: @goo
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 8
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @hoo
; CHECK: load i32, i32* %arrayidx, align 32
; CHECK: ret i32 %add.lcssa
}
define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 8
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @joo
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 4
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @koo
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 4
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @koo2
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @moo(i32* nocapture %a) nounwind uwtable {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = bitcast i32* %a to i8*
tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
ret i32 undef
; CHECK-LABEL: @moo
; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
; CHECK: ret i32 undef
}
define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%ptrint1 = ptrtoint i32* %b to i64
%maskedptr3 = and i64 %ptrint1, 127
%maskcond4 = icmp eq i64 %maskedptr3, 0
tail call void @llvm.assume(i1 %maskcond4)
%0 = bitcast i32* %a to i8*
%1 = bitcast i32* %b to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
ret i32 undef
; CHECK-LABEL: @moo2
; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
; CHECK: ret i32 undef
}
declare void @llvm.assume(i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind

View File

@ -0,0 +1,216 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = load i32, i32* %a, align 4
ret i32 %0
; CHECK-LABEL: @foo
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%offsetptr = add i64 %ptrint, 24
%maskedptr = and i64 %offsetptr, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%arrayidx = getelementptr inbounds i32, i32* %a, i64 2
%0 = load i32, i32* %arrayidx, align 4
ret i32 %0
; CHECK-LABEL: @foo2
; CHECK: load i32, i32* {{[^,]+}}, align 16
; CHECK: ret i32
}
define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%offsetptr = add i64 %ptrint, 28
%maskedptr = and i64 %offsetptr, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
%0 = load i32, i32* %arrayidx, align 4
ret i32 %0
; CHECK-LABEL: @foo2a
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = load i32, i32* %a, align 4
ret i32 %0
; CHECK-LABEL: @goo
; CHECK: load i32, i32* {{[^,]+}}, align 32
; CHECK: ret i32
}
define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 8
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @hoo
; CHECK: load i32, i32* %arrayidx, align 32
; CHECK: ret i32 %add.lcssa
}
define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 8
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @joo
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 4
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @koo
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
%r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %r.06
%indvars.iv.next = add i64 %indvars.iv, 4
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, 2048
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
; CHECK-LABEL: @koo2
; CHECK: load i32, i32* %arrayidx, align 16
; CHECK: ret i32 %add.lcssa
}
define i32 @moo(i32* nocapture %a) nounwind uwtable {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%0 = bitcast i32* %a to i8*
tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
ret i32 undef
; CHECK-LABEL: @moo
; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
; CHECK: ret i32 undef
}
define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
entry:
%ptrint = ptrtoint i32* %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
%ptrint1 = ptrtoint i32* %b to i64
%maskedptr3 = and i64 %ptrint1, 127
%maskcond4 = icmp eq i64 %maskedptr3, 0
tail call void @llvm.assume(i1 %maskcond4)
%0 = bitcast i32* %a to i8*
%1 = bitcast i32* %b to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
ret i32 undef
; CHECK-LABEL: @moo2
; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
; CHECK: ret i32 undef
}
declare void @llvm.assume(i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind

View File

@ -0,0 +1,155 @@
; RUN: opt -alignment-from-assumptions -S < %s | FileCheck %s
; RUN: opt -passes=alignment-from-assumptions -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%type1 = type { %type2 }
%type2 = type { [4 x i8] }
; Function Attrs: nounwind
declare void @llvm.assume(i1) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.bswap.i32(i32) #1
; Function Attrs: nounwind uwtable
define void @test1() unnamed_addr #2 align 2 {
; CHECK-LABEL: @test1
entry:
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
br i1 undef, label %return, label %if.end8
if.end8: ; preds = %if.end
br i1 undef, label %if.then13, label %if.end14
if.then13: ; preds = %if.end8
unreachable
if.end14: ; preds = %if.end8
br i1 undef, label %cond.false.i129, label %cond.end.i136
cond.false.i129: ; preds = %if.end14
unreachable
cond.end.i136: ; preds = %if.end14
br i1 undef, label %land.lhs.true.i, label %if.end.i145
land.lhs.true.i: ; preds = %cond.end.i136
br i1 undef, label %if.end.i145, label %if.then.i137
if.then.i137: ; preds = %land.lhs.true.i
br i1 undef, label %cond.false8.i, label %cond.end9.i
cond.false8.i: ; preds = %if.then.i137
unreachable
cond.end9.i: ; preds = %if.then.i137
br i1 undef, label %if.then23, label %if.end24
if.end.i145: ; preds = %land.lhs.true.i, %cond.end.i136
unreachable
if.then23: ; preds = %cond.end9.i
unreachable
if.end24: ; preds = %cond.end9.i
br i1 undef, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %if.end24
unreachable
for.end: ; preds = %if.end24
br i1 undef, label %if.end123, label %if.then121
if.then121: ; preds = %for.end
unreachable
if.end123: ; preds = %for.end
br i1 undef, label %if.end150, label %if.then126
if.then126: ; preds = %if.end123
%ptrint.i.i185 = ptrtoint %type1* undef to i64
%maskedptr.i.i186 = and i64 %ptrint.i.i185, 1
%maskcond.i.i187 = icmp eq i64 %maskedptr.i.i186, 0
tail call void @llvm.assume(i1 %maskcond.i.i187) #0
%ret.0..sroa_cast.i.i188 = bitcast %type1* undef to i32*
%ret.0.copyload.i.i189 = load i32, i32* %ret.0..sroa_cast.i.i188, align 2
; CHECK: load {{.*}} align 2
%0 = tail call i32 @llvm.bswap.i32(i32 %ret.0.copyload.i.i189) #0
%conv131 = zext i32 %0 to i64
%add.ptr132 = getelementptr inbounds i8, i8* undef, i64 %conv131
%1 = bitcast i8* %add.ptr132 to %type1*
br i1 undef, label %if.end150, label %if.end.i173
if.end.i173: ; preds = %if.then126
br i1 undef, label %test1.exit, label %cond.false.i.i.i.i174
cond.false.i.i.i.i174: ; preds = %if.end.i173
unreachable
test1.exit: ; preds = %if.end.i173
br i1 undef, label %test1a.exit, label %if.end.i124
if.end.i124: ; preds = %test1.exit
unreachable
test1a.exit: ; preds = %test1.exit
br i1 undef, label %if.end150, label %for.body137.lr.ph
for.body137.lr.ph: ; preds = %test1a.exit
br label %for.body137
for.body137: ; preds = %test1b.exit, %for.body137.lr.ph
%ShndxTable.0309 = phi %type1* [ %1, %for.body137.lr.ph ], [ %incdec.ptr, %test1b.exit ]
%ret.0..sroa_cast.i.i106 = bitcast %type1* %ShndxTable.0309 to i32*
br i1 undef, label %for.body137.if.end146_crit_edge, label %if.then140
for.body137.if.end146_crit_edge: ; preds = %for.body137
%incdec.ptr = getelementptr inbounds %type1, %type1* %ShndxTable.0309, i64 1
br i1 undef, label %cond.false.i70, label %cond.end.i
if.then140: ; preds = %for.body137
%ret.0.copyload.i.i102 = load i32, i32* %ret.0..sroa_cast.i.i106, align 2
; CHECK: load {{.*}} align 2
unreachable
cond.false.i70: ; preds = %for.body137.if.end146_crit_edge
unreachable
cond.end.i: ; preds = %for.body137.if.end146_crit_edge
br i1 undef, label %test1b.exit, label %cond.false.i.i
cond.false.i.i: ; preds = %cond.end.i
unreachable
test1b.exit: ; preds = %cond.end.i
br i1 undef, label %if.end150, label %for.body137
if.end150: ; preds = %test1b.exit, %test1a.exit, %if.then126, %if.end123
br i1 undef, label %for.end176, label %for.body155.lr.ph
for.body155.lr.ph: ; preds = %if.end150
unreachable
for.end176: ; preds = %if.end150
unreachable
return: ; preds = %if.end
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind uwtable }

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; CHECK: define internal i32 @deref(i32 %x.val) #0 {
define internal i32 @deref(i32* %x) nounwind {
entry:
%tmp2 = load i32, i32* %x, align 4
ret i32 %tmp2
}
define i32 @f(i32 %x) {
entry:
%x_addr = alloca i32
store i32 %x, i32* %x_addr, align 4
; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
%tmp1 = call i32 @deref( i32* %x_addr ) nounwind
ret i32 %tmp1
}
; CHECK: attributes [[NUW]] = { nounwind }

View File

@ -0,0 +1,30 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; PR2498
; This test tries to convince argpromotion about promoting the load from %A + 2,
; because there is a load of %A in the entry block
define internal i32 @callee(i1 %C, i32* %A) {
; CHECK-LABEL: define internal i32 @callee(
; CHECK: i1 %C, i32* %A)
entry:
; Unconditonally load the element at %A
%A.0 = load i32, i32* %A
br i1 %C, label %T, label %F
T:
ret i32 %A.0
F:
; Load the element at offset two from %A. This should not be promoted!
%A.2 = getelementptr i32, i32* %A, i32 2
%R = load i32, i32* %A.2
ret i32 %R
}
define i32 @foo() {
; CHECK-LABEL: define i32 @foo
%X = call i32 @callee(i1 false, i32* null) ; <i32> [#uses=1]
; CHECK: call i32 @callee(i1 false, i32* null)
ret i32 %X
}

View File

@ -0,0 +1,12 @@
; RUN: opt < %s -inline -argpromotion -disable-output
define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
entry:
unreachable
}
define void @encode(i32* %m, i32* %ts, i32* %new) nounwind {
entry:
%0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind ; <i32> [#uses=0]
unreachable
}

View File

@ -0,0 +1,25 @@
; RUN: opt < %s -argpromotion -disable-output
define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
entry:
br i1 false, label %bb, label %bb5
bb: ; preds = %entry
%0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind ; <i32> [#uses=0]
unreachable
bb5: ; preds = %entry
ret i32 0
}
define i32 @term_Sharing(i32* %Term) nounwind {
entry:
br i1 false, label %bb.i, label %bb14
bb.i: ; preds = %entry
%0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind ; <i32> [#uses=0]
ret i32 1
bb14: ; preds = %entry
ret i32 0
}

View File

@ -0,0 +1,53 @@
; RUN: opt -S -argpromotion < %s | FileCheck %s
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
; Test that we only promote arguments when the caller/callee have compatible
; function attrubtes.
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1)
define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
bb:
%tmp = load <4 x i64>, <4 x i64>* %arg1
store <4 x i64> %tmp, <4 x i64>* %arg
ret void
}
define void @no_promote(<4 x i64>* %arg) #1 {
bb:
%tmp = alloca <4 x i64>, align 32
%tmp2 = alloca <4 x i64>, align 32
%tmp3 = bitcast <4 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
%tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
ret void
}
; CHECK-LABEL: @promote_avx2(<4 x i64>* %arg, <4 x i64> %
define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
bb:
%tmp = load <4 x i64>, <4 x i64>* %arg1
store <4 x i64> %tmp, <4 x i64>* %arg
ret void
}
define void @promote(<4 x i64>* %arg) #0 {
bb:
%tmp = alloca <4 x i64>, align 32
%tmp2 = alloca <4 x i64>, align 32
%tmp3 = bitcast <4 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
%tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
attributes #1 = { nounwind uwtable }
attributes #2 = { argmemonly nounwind }

View File

@ -0,0 +1,3 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,184 @@
; RUN: opt -S -argpromotion < %s | FileCheck %s
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
; Test that we only promote arguments when the caller/callee have compatible
; function attrubtes.
target triple = "x86_64-unknown-linux-gnu"
; This should promote
; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should promote
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should promote
; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should promote
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should not promote
; CHECK-LABEL: @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should not promote
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should promote
; CHECK-LABEL: @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; This should promote
; CHECK-LABEL: @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
bb:
%tmp = load <8 x i64>, <8 x i64>* %arg1
store <8 x i64> %tmp, <8 x i64>* %arg
ret void
}
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
bb:
%tmp = alloca <8 x i64>, align 32
%tmp2 = alloca <8 x i64>, align 32
%tmp3 = bitcast <8 x i64>* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
attributes #5 = { argmemonly nounwind }

View File

@ -0,0 +1,31 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
%T = type { i32, i32, i32, i32 }
@G = constant %T { i32 0, i32 0, i32 17, i32 25 }
define internal i32 @test(%T* %p) {
; CHECK-LABEL: define internal i32 @test(
; CHECK: i32 %{{.*}}, i32 %{{.*}})
entry:
%a.gep = getelementptr %T, %T* %p, i64 0, i32 3
%b.gep = getelementptr %T, %T* %p, i64 0, i32 2
%a = load i32, i32* %a.gep
%b = load i32, i32* %b.gep
; CHECK-NOT: load
%v = add i32 %a, %b
ret i32 %v
; CHECK: ret i32
}
define i32 @caller() {
; CHECK-LABEL: define i32 @caller(
entry:
%v = call i32 @test(%T* @G)
; CHECK: %[[B_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 2
; CHECK: %[[B:.*]] = load i32, i32* %[[B_GEP]]
; CHECK: %[[A_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 3
; CHECK: %[[A:.*]] = load i32, i32* %[[A_GEP]]
; CHECK: call i32 @test(i32 %[[B]], i32 %[[A]])
ret i32 %v
}

View File

@ -0,0 +1,52 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
%struct.ss = type { i32, i64 }
; Don't drop 'byval' on %X here.
define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
; CHECK-LABEL: define internal void @f(
; CHECK: i32 %[[B0:.*]], i64 %[[B1:.*]], i32* byval %X, i32 %i)
entry:
; CHECK: %[[B:.*]] = alloca %struct.ss
; CHECK: %[[B_GEP0:.*]] = getelementptr %struct.ss, %struct.ss* %[[B]], i32 0, i32 0
; CHECK: store i32 %[[B0]], i32* %[[B_GEP0]]
; CHECK: %[[B_GEP1:.*]] = getelementptr %struct.ss, %struct.ss* %[[B]], i32 0, i32 1
; CHECK: store i64 %[[B1]], i64* %[[B_GEP1]]
%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
; CHECK: %[[TMP:.*]] = getelementptr %struct.ss, %struct.ss* %[[B]], i32 0, i32 0
%tmp1 = load i32, i32* %tmp, align 4
; CHECK: %[[TMP1:.*]] = load i32, i32* %[[TMP]]
%tmp2 = add i32 %tmp1, 1
; CHECK: %[[TMP2:.*]] = add i32 %[[TMP1]], 1
store i32 %tmp2, i32* %tmp, align 4
; CHECK: store i32 %[[TMP2]], i32* %[[TMP]]
store i32 0, i32* %X
; CHECK: store i32 0, i32* %X
ret void
}
; Also make sure we don't drop the call zeroext attribute.
define i32 @test(i32* %X) {
; CHECK-LABEL: define i32 @test(
entry:
%S = alloca %struct.ss
; CHECK: %[[S:.*]] = alloca %struct.ss
%tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %tmp1, align 8
; CHECK: store i32 1
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4
; CHECK: store i64 2
call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0)
; CHECK: %[[S_GEP0:.*]] = getelementptr %struct.ss, %struct.ss* %[[S]], i32 0, i32 0
; CHECK: %[[S0:.*]] = load i32, i32* %[[S_GEP0]]
; CHECK: %[[S_GEP1:.*]] = getelementptr %struct.ss, %struct.ss* %[[S]], i32 0, i32 1
; CHECK: %[[S1:.*]] = load i64, i64* %[[S_GEP1]]
; CHECK: call void @f(i32 %[[S0]], i64 %[[S1]], i32* byval %X, i32 zeroext 0)
ret i32 0
}

View File

@ -0,0 +1,29 @@
; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define internal i32 @test(i32* %X, i32* %Y) {
; CHECK-LABEL: define internal i32 @test(i32 %X.val, i32 %Y.val)
%A = load i32, i32* %X
%B = load i32, i32* %Y
%C = add i32 %A, %B
ret i32 %C
}
define internal i32 @caller(i32* %B) {
; CHECK-LABEL: define internal i32 @caller(i32 %B.val1)
%A = alloca i32
store i32 1, i32* %A
%C = call i32 @test(i32* %A, i32* %B)
; CHECK: call i32 @test(i32 1, i32 %B.val1)
ret i32 %C
}
define i32 @callercaller() {
; CHECK-LABEL: define i32 @callercaller()
%B = alloca i32
store i32 2, i32* %B
%X = call i32 @caller(i32* %B)
; CHECK: call i32 @caller(i32 2)
ret i32 %X
}

View File

@ -0,0 +1,32 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; Arg promotion eliminates the struct argument.
; FIXME: Should it eliminate the i32* argument?
%struct.ss = type { i32, i64 }
define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind {
; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1, i32* byval %X)
entry:
%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%tmp1 = load i32, i32* %tmp, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp, align 4
store i32 0, i32* %X
ret void
}
define i32 @test(i32* %X) {
; CHECK-LABEL: define i32 @test
entry:
%S = alloca %struct.ss
%tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %tmp1, align 8
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4
call void @f( %struct.ss* byval %S, i32* byval %X)
; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}}, i32* byval %{{.*}})
ret i32 0
}

View File

@ -0,0 +1,50 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.ss = type { i32, i64 }
define internal void @f(%struct.ss* byval %b) nounwind {
entry:
%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%tmp1 = load i32, i32* %tmp, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp, align 4
ret void
}
; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1)
; CHECK: alloca %struct.ss{{$}}
; CHECK: store i32 %b.0
; CHECK: store i64 %b.1
define internal void @g(%struct.ss* byval align 32 %b) nounwind {
entry:
%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%tmp1 = load i32, i32* %tmp, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp, align 4
ret void
}
; CHECK-LABEL: define internal void @g(i32 %b.0, i64 %b.1)
; CHECK: alloca %struct.ss, align 32
; CHECK: store i32 %b.0
; CHECK: store i64 %b.1
define i32 @main() nounwind {
entry:
%S = alloca %struct.ss
%tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %tmp1, align 8
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4
call void @f(%struct.ss* byval %S) nounwind
call void @g(%struct.ss* byval %S) nounwind
ret i32 0
}
; CHECK-LABEL: define i32 @main
; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}})
; CHECK: call void @g(i32 %{{.*}}, i64 %{{.*}})

View File

@ -0,0 +1,27 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
@G1 = constant i32 0
@G2 = constant i32* @G1
define internal i32 @test(i32** %x) {
; CHECK-LABEL: define internal i32 @test(
; CHECK: i32 %{{.*}})
entry:
%y = load i32*, i32** %x
%z = load i32, i32* %y
; CHECK-NOT: load
ret i32 %z
; CHECK: ret i32
}
define i32 @caller() {
; CHECK-LABEL: define i32 @caller()
entry:
%x = call i32 @test(i32** @G2)
; CHECK: %[[Y:.*]] = load i32*, i32** @G2
; CHECK: %[[Z:.*]] = load i32, i32* %[[Y]]
; CHECK: call i32 @test(i32 %[[Z]])
ret i32 %x
}

View File

@ -0,0 +1,27 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; Don't promote around control flow.
define internal i32 @callee(i1 %C, i32* %P) {
; CHECK-LABEL: define internal i32 @callee(
; CHECK: i1 %C, i32* %P)
entry:
br i1 %C, label %T, label %F
T:
ret i32 17
F:
%X = load i32, i32* %P
ret i32 %X
}
define i32 @foo() {
; CHECK-LABEL: define i32 @foo(
entry:
; CHECK-NOT: load i32, i32* null
%X = call i32 @callee(i1 true, i32* null)
; CHECK: call i32 @callee(i1 true, i32* null)
ret i32 %X
}

View File

@ -0,0 +1,24 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; CHECK: load i32, i32* %A
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define internal i32 @callee(i1 %C, i32* %P) {
br i1 %C, label %T, label %F
T: ; preds = %0
ret i32 17
F: ; preds = %0
%X = load i32, i32* %P ; <i32> [#uses=1]
ret i32 %X
}
define i32 @foo() {
%A = alloca i32 ; <i32*> [#uses=2]
store i32 17, i32* %A
%X = call i32 @callee( i1 false, i32* %A ) ; <i32> [#uses=1]
ret i32 %X
}

View File

@ -0,0 +1,73 @@
; RUN: opt -S < %s -inline -argpromotion | FileCheck %s
; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s
%S = type { %S* }
; Inlining should nuke the invoke (and any inlined calls) here even with
; argument promotion running along with it.
define void @zot() personality i32 (...)* @wibble {
; CHECK-LABEL: define void @zot() personality i32 (...)* @wibble
; CHECK-NOT: call
; CHECK-NOT: invoke
bb:
invoke void @hoge()
to label %bb1 unwind label %bb2
bb1:
unreachable
bb2:
%tmp = landingpad { i8*, i32 }
cleanup
unreachable
}
define internal void @hoge() {
bb:
%tmp = call fastcc i8* @spam(i1 (i8*)* @eggs)
%tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney)
unreachable
}
define internal fastcc i8* @spam(i1 (i8*)* %arg) {
bb:
unreachable
}
define internal i1 @eggs(i8* %arg) {
bb:
%tmp = call zeroext i1 @barney(i8* %arg)
unreachable
}
define internal i1 @barney(i8* %arg) {
bb:
ret i1 undef
}
define i32 @test_inf_promote_caller(i32 %arg) {
; CHECK-LABEL: define i32 @test_inf_promote_caller(
bb:
%tmp = alloca %S
%tmp1 = alloca %S
%tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1)
; CHECK: call i32 @test_inf_promote_callee(%S* %{{.*}}, %S* %{{.*}})
ret i32 0
}
define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) {
; CHECK-LABEL: define internal i32 @test_inf_promote_callee(
; CHECK: %S* %{{.*}}, %S* %{{.*}})
bb:
%tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
%tmp2 = load %S*, %S** %tmp
%tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
%tmp4 = load %S*, %S** %tmp3
%tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2)
; CHECK: call i32 @test_inf_promote_callee(%S* %{{.*}}, %S* %{{.*}})
ret i32 0
}
declare i32 @wibble(...)

View File

@ -0,0 +1,49 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
declare void @sink(i32)
; CHECK: define internal void @test({{.*}} !dbg [[SP:![0-9]+]]
define internal void @test(i32** %X) !dbg !2 {
%1 = load i32*, i32** %X, align 8
%2 = load i32, i32* %1, align 8
call void @sink(i32 %2)
ret void
}
%struct.pair = type { i32, i32 }
; CHECK: define internal void @test_byval(i32 %{{.*}}, i32 %{{.*}})
define internal void @test_byval(%struct.pair* byval %P) {
ret void
}
; CHECK-LABEL: define {{.*}} @caller(
define void @caller(i32** %Y, %struct.pair* %P) {
; CHECK: load i32*, {{.*}} !dbg [[LOC_1:![0-9]+]]
; CHECK-NEXT: load i32, {{.*}} !dbg [[LOC_1]]
; CHECK-NEXT: call void @test(i32 %{{.*}}), !dbg [[LOC_1]]
call void @test(i32** %Y), !dbg !1
; CHECK: getelementptr %struct.pair, {{.*}} !dbg [[LOC_2:![0-9]+]]
; CHECK-NEXT: load i32, i32* {{.*}} !dbg [[LOC_2]]
; CHECK-NEXT: getelementptr %struct.pair, {{.*}} !dbg [[LOC_2]]
; CHECK-NEXT: load i32, i32* {{.*}} !dbg [[LOC_2]]
; CHECK-NEXT: call void @test_byval(i32 %{{.*}}, i32 %{{.*}}), !dbg [[LOC_2]]
call void @test_byval(%struct.pair* %P), !dbg !6
ret void
}
; CHECK: [[SP]] = distinct !DISubprogram(name: "test",
; CHECK: [[LOC_1]] = !DILocation(line: 8
; CHECK: [[LOC_2]] = !DILocation(line: 9
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !DILocation(line: 8, scope: !2)
!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null)
!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5)
!5 = !DIFile(filename: "test.c", directory: "")
!6 = !DILocation(line: 9, scope: !2)

View File

@ -0,0 +1,59 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%union.u = type { x86_fp80 }
%struct.s = type { double, i16, i8, [5 x i8] }
@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
%struct.Foo = type { i32, i64 }
@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
define void @run() {
entry:
tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
call i64 @CaptureAStruct(%struct.Foo* @a)
ret void
}
; CHECK: internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
entry:
%bitcast = bitcast %union.u* %arg to %struct.s*
%gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
%result = load i8, i8* %gep
ret i8 %result
}
; CHECK: internal x86_fp80 @UseLongDoubleSafely(x86_fp80 {{%.*}}) {
define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
%gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
%fp80 = load x86_fp80, x86_fp80* %gep
ret x86_fp80 %fp80
}
; CHECK: define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
%p = bitcast %struct.Foo* %a to i64*
%v = load i64, i64* %p
ret i64 %v
}
; CHECK: define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
entry:
%a_ptr = alloca %struct.Foo*
br label %loop
loop:
%phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
%0 = phi %struct.Foo* [ %a, %entry ], [ %0, %loop ]
store %struct.Foo* %phi, %struct.Foo** %a_ptr
%gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0
br label %loop
}

View File

@ -0,0 +1,50 @@
; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.ss = type { i32, i32 }
; Argpromote + sroa should change this to passing the two integers by value.
define internal i32 @f(%struct.ss* inalloca %s) {
entry:
%f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
%f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
%a = load i32, i32* %f0, align 4
%b = load i32, i32* %f1, align 4
%r = add i32 %a, %b
ret i32 %r
}
; CHECK-LABEL: define internal i32 @f
; CHECK-NOT: load
; CHECK: ret
define i32 @main() {
entry:
%S = alloca inalloca %struct.ss
%f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
%f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i32 1, i32* %f0, align 4
store i32 2, i32* %f1, align 4
%r = call i32 @f(%struct.ss* inalloca %S)
ret i32 %r
}
; CHECK-LABEL: define i32 @main
; CHECK-NOT: load
; CHECK: ret
; Argpromote can't promote %a because of the icmp use.
define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
entry:
%c = icmp eq %struct.ss* %a, %b
ret i1 %c
}
define i32 @test() {
entry:
%S = alloca inalloca %struct.ss
%c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
ret i32 0
}

View File

@ -0,0 +1,50 @@
; Check that when argument promotion changes a function in some parent node of
; the call graph, any analyses that happened to be cached for that function are
; actually invalidated. We are using `demanded-bits` here because when printed
; it will end up caching a value for every instruction, making it easy to
; detect the instruction-level changes that will fail here. With improper
; invalidation this will crash in the second printer as it tries to reuse
; now-invalid demanded bits.
;
; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -S | FileCheck %s
@G = constant i32 0
define internal i32 @a(i32* %x) {
; CHECK-LABEL: define internal i32 @a(
; CHECK-SAME: i32 %[[V:.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 %[[V]]
; CHECK-NEXT: }
entry:
%v = load i32, i32* %x
ret i32 %v
}
define i32 @b() {
; CHECK-LABEL: define i32 @b()
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[L:.*]] = load i32, i32* @G
; CHECK-NEXT: %[[V:.*]] = call i32 @a(i32 %[[L]])
; CHECK-NEXT: ret i32 %[[V]]
; CHECK-NEXT: }
entry:
%v = call i32 @a(i32* @G)
ret i32 %v
}
define i32 @c() {
; CHECK-LABEL: define i32 @c()
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[L:.*]] = load i32, i32* @G
; CHECK-NEXT: %[[V1:.*]] = call i32 @a(i32 %[[L]])
; CHECK-NEXT: %[[V2:.*]] = call i32 @b()
; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[V1]], %[[V2]]
; CHECK-NEXT: ret i32 %[[RESULT]]
; CHECK-NEXT: }
entry:
%v1 = call i32 @a(i32* @G)
%v2 = call i32 @b()
%result = add i32 %v1, %v2
ret i32 %result
}

View File

@ -0,0 +1,45 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; PR36543
; Don't promote arguments of musttail callee
%T = type { i32, i32, i32, i32 }
; CHECK-LABEL: define internal i32 @test(%T* %p)
define internal i32 @test(%T* %p) {
%a.gep = getelementptr %T, %T* %p, i64 0, i32 3
%b.gep = getelementptr %T, %T* %p, i64 0, i32 2
%a = load i32, i32* %a.gep
%b = load i32, i32* %b.gep
%v = add i32 %a, %b
ret i32 %v
}
; CHECK-LABEL: define i32 @caller(%T* %p)
define i32 @caller(%T* %p) {
%v = musttail call i32 @test(%T* %p)
ret i32 %v
}
; Don't promote arguments of musttail caller
define i32 @foo(%T* %p, i32 %v) {
ret i32 0
}
; CHECK-LABEL: define internal i32 @test2(%T* %p, i32 %p2)
define internal i32 @test2(%T* %p, i32 %p2) {
%a.gep = getelementptr %T, %T* %p, i64 0, i32 3
%b.gep = getelementptr %T, %T* %p, i64 0, i32 2
%a = load i32, i32* %a.gep
%b = load i32, i32* %b.gep
%v = add i32 %a, %b
%ca = musttail call i32 @foo(%T* undef, i32 %v)
ret i32 %ca
}
; CHECK-LABEL: define i32 @caller2(%T* %g)
define i32 @caller2(%T* %g) {
%v = call i32 @test2(%T* %g, i32 0)
ret i32 %v
}

View File

@ -0,0 +1,23 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; Don't promote paramaters of/arguments to naked functions
@g = common global i32 0, align 4
define i32 @bar() {
entry:
%call = call i32 @foo(i32* @g)
; CHECK: %call = call i32 @foo(i32* @g)
ret i32 %call
}
define internal i32 @foo(i32*) #0 {
entry:
%retval = alloca i32, align 4
call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
unreachable
}
; CHECK: define internal i32 @foo(i32*)
attributes #0 = { naked }

View File

@ -0,0 +1,24 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; ArgumentPromotion should preserve the default function address space
; from the data layout.
target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
@g = common global i32 0, align 4
define i32 @bar() {
entry:
%call = call i32 @foo(i32* @g)
; CHECK: %call = call addrspace(1) i32 @foo()
ret i32 %call
}
; CHECK: define internal i32 @foo() addrspace(1)
define internal i32 @foo(i32*) {
entry:
%retval = alloca i32, align 4
call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
unreachable
}

View File

@ -0,0 +1,33 @@
; RUN: opt -S -argpromotion < %s | FileCheck %s
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
; RUN: opt -S -debugify -o /dev/null < %s
target triple = "x86_64-pc-windows-msvc"
define internal void @callee(i8*) {
entry:
call void @thunk()
ret void
}
define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
entry:
invoke void @thunk()
to label %out unwind label %cpad
out:
ret void
cpad:
%pad = cleanuppad within none []
call void @callee(i8* null) [ "funclet"(token %pad) ]
cleanupret from %pad unwind to caller
}
; CHECK-LABEL: define void @test1(
; CHECK: %[[pad:.*]] = cleanuppad within none []
; CHECK-NEXT: call void @callee() [ "funclet"(token %[[pad]]) ]
; CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
declare void @thunk()
declare i32 @__CxxFrameHandler3(...)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; PR 32917
@b = common local_unnamed_addr global i32 0, align 4
@a = common local_unnamed_addr global i32 0, align 4
define i32 @fn2() local_unnamed_addr {
%1 = load i32, i32* @b, align 4
%2 = sext i32 %1 to i64
%3 = inttoptr i64 %2 to i32*
call fastcc void @fn1(i32* %3)
ret i32 undef
}
define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr {
%2 = getelementptr inbounds i32, i32* %0, i64 -1
%3 = load i32, i32* %2, align 4
store i32 %3, i32* @a, align 4
ret void
}
; CHECK: getelementptr {{.*}} -1
; CHECK-NOT: getelementptr {{.*}} 4294967295

View File

@ -0,0 +1,38 @@
; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s
; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
; dbg.value which still used the removed argument.
%p_t = type i16*
%fun_t = type void (%p_t)*
define void @foo() {
%tmp = alloca %fun_t
store %fun_t @bar, %fun_t* %tmp
ret void
}
define internal void @bar(%p_t %p) {
call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2}
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{i32 2, !"Debug Info Version", i32 3}
!3 = distinct !DISubprogram(name: "bar", unit: !0)
!4 = !DILocalVariable(name: "p", scope: !3)
!5 = !DIExpression()
!6 = !DILocation(line: 1, column: 1, scope: !3)
; The %p argument should be removed, and the use of it in dbg.value should be
; changed to undef.
; CHECK: define internal void @bar() {
; CHECK-NEXT: call void @llvm.dbg.value(metadata i16* undef
; CHECK-NEXT: ret void
; CHECK-NEXT: }

View File

@ -0,0 +1,23 @@
; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
; Checks if !prof metadata is corret in deadargelim.
define void @caller() #0 {
%x = alloca i32
store i32 42, i32* %x
call void @promote_i32_ptr(i32* %x), !prof !0
; CHECK: call void @promote_i32_ptr(i32 42), !prof ![[PROF:[0-9]]]
ret void
}
define internal void @promote_i32_ptr(i32* %xp) {
%x = load i32, i32* %xp
call void @use_i32(i32 %x)
ret void
}
declare void @use_i32(i32)
; CHECK: ![[PROF]] = !{!"branch_weights", i32 30}
!0 = !{!"branch_weights", i32 30}

View File

@ -0,0 +1,53 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; PR17906
; When we promote two arguments in a single function with different types,
; before the fix, we used the same tag for the newly-created two loads.
; This testing case makes sure that we correctly transfer the tbaa tags from the
; original loads to the newly-created loads when promoting pointer arguments.
@a = global i32* null, align 8
@e = global i32** @a, align 8
@g = global i32 0, align 4
@c = global i64 0, align 8
@d = global i8 0, align 1
define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
entry:
%0 = load i64, i64* %p2, align 8, !tbaa !1
%conv = trunc i64 %0 to i32
%1 = load i32, i32* %p1, align 4, !tbaa !5
%conv1 = trunc i32 %1 to i8
store i8 %conv1, i8* @d, align 1, !tbaa !7
ret void
}
define i32 @main() {
entry:
; CHECK-LABEL: main
; CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa ![[I32:[0-9]+]]
; CHECK: %g.val = load i32, i32* @g, align 4, !tbaa ![[I32]]
; CHECK: %c.val = load i64, i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
%0 = load i32**, i32*** @e, align 8, !tbaa !8
store i32* @g, i32** %0, align 8, !tbaa !8
%1 = load i32*, i32** @a, align 8, !tbaa !8
store i32 1, i32* %1, align 4, !tbaa !5
call fastcc void @fn(i32* @g, i64* @c)
ret i32 0
}
!1 = !{!2, !2, i64 0}
!2 = !{!"long", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"int", !3, i64 0}
!7 = !{!3, !3, i64 0}
!8 = !{!9, !9, i64 0}
!9 = !{!"any pointer", !3, i64 0}
; CHECK: ![[I32]] = !{![[I32_TYPE:[0-9]+]], ![[I32_TYPE]], i64 0}
; CHECK: ![[I32_TYPE]] = !{!"int", !{{.*}}, i64 0}
; CHECK: ![[LONG]] = !{![[LONG_TYPE:[0-9]+]], ![[LONG_TYPE]], i64 0}
; CHECK: ![[LONG_TYPE]] = !{!"long", !{{.*}}, i64 0}

View File

@ -0,0 +1,29 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"
; CHECK: define internal void @add(i32 %[[THIS1:.*]], i32 %[[THIS2:.*]], i32* noalias %[[SR:.*]])
define internal void @add({i32, i32}* %this, i32* sret %r) {
%ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0
%bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1
%a = load i32, i32* %ap
%b = load i32, i32* %bp
; CHECK: %[[AB:.*]] = add i32 %[[THIS1]], %[[THIS2]]
%ab = add i32 %a, %b
; CHECK: store i32 %[[AB]], i32* %[[SR]]
store i32 %ab, i32* %r
ret void
}
; CHECK: define void @f()
define void @f() {
; CHECK: %[[R:.*]] = alloca i32
%r = alloca i32
%pair = alloca {i32, i32}
; CHECK: call void @add(i32 %{{.*}}, i32 %{{.*}}, i32* noalias %[[R]])
call void @add({i32, i32}* %pair, i32* sret %r)
ret void
}

View File

@ -0,0 +1,23 @@
; RUN: opt %s -argpromotion -S -o - | FileCheck %s
; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s
; PR14710
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
%pair = type { i32, i32 }
declare i8* @foo(%pair*)
define internal void @bar(%pair* byval %Data) {
; CHECK: define internal void @bar(i32 %Data.0, i32 %Data.1)
; CHECK: %Data = alloca %pair
; CHECK-NOT: tail
; CHECK: call i8* @foo(%pair* %Data)
tail call i8* @foo(%pair* %Data)
ret void
}
define void @zed(%pair* byval %Data) {
call void @bar(%pair* byval %Data)
ret void
}

View File

@ -0,0 +1,29 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; Unused arguments from variadic functions cannot be eliminated as that changes
; their classiciation according to the SysV amd64 ABI. Clang and other frontends
; bake in the classification when they use things like byval, as in this test.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.tt0 = type { i64, i64 }
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
entry:
tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
ret i32 0
}
; Function Attrs: nounwind uwtable
define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
entry:
ret void
}
; CHECK-LABEL: define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...)

View File

@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=aarch64-linux-gnu -atomic-expand %s | FileCheck %s
define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_f32(
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fadd float* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fsub float* %ptr, float %value seq_cst
ret float %res
}

View File

@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
; CHECK-LABEL: @atomic_swap_f16(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg half* %ptr, half %val acquire
ret void
}
define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
; CHECK-LABEL: @atomic_swap_f32(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg float* %ptr, float %val acquire
ret void
}
define void @atomic_swap_f64(double* %ptr, double %val) nounwind {
; CHECK-LABEL: @atomic_swap_f64(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg double* %ptr, double %val acquire
ret void
}

View File

@ -0,0 +1,3 @@
if not 'AArch64' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,264 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -atomic-expand %s | FileCheck -check-prefix=CI %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s | FileCheck -check-prefix=GFX9 %s
define float @test_atomicrmw_fadd_f32_flat(float* %ptr, float %value) {
; CI-LABEL: @test_atomicrmw_fadd_f32_flat(
; CI-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret float [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_flat(
; GFX9-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fadd float* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fadd_f32_global(float addrspace(1)* %ptr, float %value) {
; CI-LABEL: @test_atomicrmw_fadd_f32_global(
; CI-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret float [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_global(
; GFX9-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fadd float addrspace(1)* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fadd_f32_local(float addrspace(3)* %ptr, float %value) {
; CI-LABEL: @test_atomicrmw_fadd_f32_local(
; CI-NEXT: [[TMP1:%.*]] = load float, float addrspace(3)* [[PTR:%.*]], align 4
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast float addrspace(3)* [[PTR]] to i32 addrspace(3)*
; CI-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CI-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(3)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret float [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f32_local(
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd float addrspace(3)* [[PTR:%.*]], float [[VALUE:%.*]] seq_cst
; GFX9-NEXT: ret float [[RES]]
;
%res = atomicrmw fadd float addrspace(3)* %ptr, float %value seq_cst
ret float %res
}
define half @test_atomicrmw_fadd_f16_flat(half* %ptr, half %value) {
; CI-LABEL: @test_atomicrmw_fadd_f16_flat(
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; CI-NEXT: ret half [[RES]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_flat(
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; GFX9-NEXT: ret half [[RES]]
;
%res = atomicrmw fadd half* %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_fadd_f16_global(half addrspace(1)* %ptr, half %value) {
; CI-LABEL: @test_atomicrmw_fadd_f16_global(
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; CI-NEXT: ret half [[RES]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_global(
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; GFX9-NEXT: ret half [[RES]]
;
%res = atomicrmw fadd half addrspace(1)* %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_fadd_f16_local(half addrspace(3)* %ptr, half %value) {
; CI-LABEL: @test_atomicrmw_fadd_f16_local(
; CI-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; CI-NEXT: ret half [[RES]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f16_local(
; GFX9-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(3)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst
; GFX9-NEXT: ret half [[RES]]
;
%res = atomicrmw fadd half addrspace(3)* %ptr, half %value seq_cst
ret half %res
}
define double @test_atomicrmw_fadd_f64_flat(double* %ptr, double %value) {
; CI-LABEL: @test_atomicrmw_fadd_f64_flat(
; CI-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret double [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_flat(
; GFX9-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fadd double* %ptr, double %value seq_cst
ret double %res
}
define double @test_atomicrmw_fadd_f64_global(double addrspace(1)* %ptr, double %value) {
; CI-LABEL: @test_atomicrmw_fadd_f64_global(
; CI-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret double [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_global(
; GFX9-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fadd double addrspace(1)* %ptr, double %value seq_cst
ret double %res
}
define double @test_atomicrmw_fadd_f64_local(double addrspace(3)* %ptr, double %value) {
; CI-LABEL: @test_atomicrmw_fadd_f64_local(
; CI-NEXT: [[TMP1:%.*]] = load double, double addrspace(3)* [[PTR:%.*]], align 8
; CI-NEXT: br label [[ATOMICRMW_START:%.*]]
; CI: atomicrmw.start:
; CI-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CI-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; CI-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
; CI-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; CI-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; CI-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; CI-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CI-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CI-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; CI-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CI: atomicrmw.end:
; CI-NEXT: ret double [[TMP6]]
;
; GFX9-LABEL: @test_atomicrmw_fadd_f64_local(
; GFX9-NEXT: [[TMP1:%.*]] = load double, double addrspace(3)* [[PTR:%.*]], align 8
; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
; GFX9: atomicrmw.start:
; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GFX9-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]]
; GFX9-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GFX9: atomicrmw.end:
; GFX9-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fadd double addrspace(3)* %ptr, double %value seq_cst
ret double %res
}

View File

@ -0,0 +1,201 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -atomic-expand %s | FileCheck -check-prefix=GCN %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s | FileCheck -check-prefix=GCN %s
define float @test_atomicrmw_fadd_f32_flat(float* %ptr, float %value) {
; GCN-LABEL: @test_atomicrmw_fadd_f32_flat(
; GCN-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; GCN-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; GCN-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fsub float* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fsub_f32_global(float addrspace(1)* %ptr, float %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f32_global(
; GCN-NEXT: [[TMP1:%.*]] = load float, float addrspace(1)* [[PTR:%.*]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast float addrspace(1)* [[PTR]] to i32 addrspace(1)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; GCN-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fsub float addrspace(1)* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fsub_f32_local(float addrspace(3)* %ptr, float %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f32_local(
; GCN-NEXT: [[TMP1:%.*]] = load float, float addrspace(3)* [[PTR:%.*]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast float addrspace(3)* [[PTR]] to i32 addrspace(3)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; GCN-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(3)* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fsub float addrspace(3)* %ptr, float %value seq_cst
ret float %res
}
define half @test_atomicrmw_fsub_f16_flat(half* %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f16_flat(
; GCN-NEXT: [[TMP1:%.*]] = load half, half* [[PTR:%.*]], align 2
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub half [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast half* [[PTR]] to i16*
; GCN-NEXT: [[TMP3:%.*]] = bitcast half [[NEW]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i16* [[TMP2]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret half [[TMP6]]
;
%res = atomicrmw fsub half* %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_fsub_f16_global(half addrspace(1)* %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f16_global(
; GCN-NEXT: [[TMP1:%.*]] = load half, half addrspace(1)* [[PTR:%.*]], align 2
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub half [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast half addrspace(1)* [[PTR]] to i16 addrspace(1)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast half [[NEW]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i16 addrspace(1)* [[TMP2]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret half [[TMP6]]
;
%res = atomicrmw fsub half addrspace(1)* %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_fsub_f16_local(half addrspace(3)* %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f16_local(
; GCN-NEXT: [[TMP1:%.*]] = load half, half addrspace(3)* [[PTR:%.*]], align 2
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub half [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast half addrspace(3)* [[PTR]] to i16 addrspace(3)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast half [[NEW]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i16 addrspace(3)* [[TMP2]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret half [[TMP6]]
;
%res = atomicrmw fsub half addrspace(3)* %ptr, half %value seq_cst
ret half %res
}
define double @test_atomicrmw_fsub_f64_flat(double* %ptr, double %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f64_flat(
; GCN-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fsub double* %ptr, double %value seq_cst
ret double %res
}
define double @test_atomicrmw_fsub_f64_global(double addrspace(1)* %ptr, double %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f64_global(
; GCN-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fsub double addrspace(1)* %ptr, double %value seq_cst
ret double %res
}
define double @test_atomicrmw_fsub_f64_local(double addrspace(3)* %ptr, double %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f64_local(
; GCN-NEXT: [[TMP1:%.*]] = load double, double addrspace(3)* [[PTR:%.*]], align 8
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP2:%.*]] = bitcast double addrspace(3)* [[PTR]] to i64 addrspace(3)*
; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[NEW]] to i64
; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
; GCN-NEXT: [[TMP5:%.*]] = cmpxchg i64 addrspace(3)* [[TMP2]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: ret double [[TMP6]]
;
%res = atomicrmw fsub double addrspace(3)* %ptr, double %value seq_cst
ret double %res
}

View File

@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -atomic-expand %s | FileCheck %s
; RUN: opt -mtriple=r600-mesa-mesa3d -S -atomic-expand %s | FileCheck %s
define i32 @test_atomicrmw_nand_i32_flat(i32* %ptr, i32 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i32_flat(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw nand i32* %ptr, i32 %value seq_cst
ret i32 %res
}
define i32 @test_atomicrmw_nand_i32_global(i32 addrspace(1)* %ptr, i32 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i32_global(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32 addrspace(1)* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw nand i32 addrspace(1)* %ptr, i32 %value seq_cst
ret i32 %res
}
define i32 @test_atomicrmw_nand_i32_local(i32 addrspace(3)* %ptr, i32 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i32_local(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i32 addrspace(3)* [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret i32 [[NEWLOADED]]
;
%res = atomicrmw nand i32 addrspace(3)* %ptr, i32 %value seq_cst
ret i32 %res
}

View File

@ -0,0 +1,2 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,440 @@
; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand -codegen-opt-level=1 %s | FileCheck %s
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
; CHECK-LABEL: @test_atomic_xchg_i8
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: dmb
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
ret i8 %res
}
define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
; CHECK-LABEL: @test_atomic_add_i16
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i16 [[OLDVAL]]
%res = atomicrmw add i16* %ptr, i16 %addend seq_cst
ret i16 %res
}
define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
; CHECK-LABEL: @test_atomic_sub_i32
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i32 [[OLDVAL]]
%res = atomicrmw sub i32* %ptr, i32 %subend acquire
ret i32 %res
}
define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
; CHECK-LABEL: @test_atomic_and_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: dmb
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw and i8* %ptr, i8 %andend release
ret i8 %res
}
define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
; CHECK-LABEL: @test_atomic_nand_i16
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i16 [[OLDVAL]]
%res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
ret i16 %res
}
define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
; CHECK-LABEL: @test_atomic_or_i64
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i64 [[OLDVAL]]
%res = atomicrmw or i64* %ptr, i64 %orend seq_cst
ret i64 %res
}
define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
; CHECK-LABEL: @test_atomic_xor_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
ret i8 %res
}
define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
; CHECK-LABEL: @test_atomic_max_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
ret i8 %res
}
define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
; CHECK-LABEL: @test_atomic_min_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw min i8* %ptr, i8 %minend seq_cst
ret i8 %res
}
define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
; CHECK-LABEL: @test_atomic_umax_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
ret i8 %res
}
define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
; CHECK-LABEL: @test_atomic_umin_i8
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
ret i8 %res
}
define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[FENCED_STORE]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
; CHECK: [[RELEASED_LOAD]]:
; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
; CHECK: ret i8 [[LOADED]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
ret i8 %old
}
define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[FENCED_STORE]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
; CHECK: [[RELEASED_LOAD]]:
; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
; CHECK: ret i16 [[LOADED]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
ret i16 %old
}
define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[OLDVAL]]
%pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
%old = extractvalue { i32, i1 } %pairold, 0
ret i32 %old
}
define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[OLDVAL]]
%pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
}
define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
; CHECK-LABEL: @test_cmpxchg_minsize
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK: call void @llvm.arm.clrex()
; CHECK: br label %[[FAILURE_BB]]
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END]]
; CHECK: [[END]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[LOADED]]
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%oldval = extractvalue { i32, i1 } %pair, 0
ret i32 %oldval
}

View File

@ -0,0 +1,242 @@
; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s -codegen-opt-level=1 | FileCheck %s
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
; CHECK-LABEL: @test_atomic_xchg_i8
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: fence
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
ret i8 %res
}
define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
; CHECK-LABEL: @test_atomic_add_i16
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: fence
; CHECK: ret i16 [[OLDVAL]]
%res = atomicrmw add i16* %ptr, i16 %addend seq_cst
ret i16 %res
}
define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
; CHECK-LABEL: @test_atomic_sub_i32
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: fence
; CHECK: ret i32 [[OLDVAL]]
%res = atomicrmw sub i32* %ptr, i32 %subend acquire
ret i32 %res
}
define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
; CHECK-LABEL: @test_atomic_or_i64
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
; CHECK-NOT: fence
; CHECK: ret i64 [[OLDVAL]]
%res = atomicrmw or i64* %ptr, i64 %orend seq_cst
ret i64 %res
}
define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i8 [[OLDVAL]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
ret i8 %old
}
define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i16 [[OLDVAL]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
ret i16 %old
}
define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[OLDVAL]]
%pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
%old = extractvalue { i32, i1 } %pairold, 0
ret i32 %old
}
define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[OLDVAL]]
%pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
}

View File

@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fadd_f32(
; CHECK-NEXT: call void @llvm.arm.dmb(i32 11)
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: call void @llvm.arm.dmb(i32 11)
; CHECK-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fadd float* %ptr, float %value seq_cst
ret float %res
}
define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
; CHECK-NEXT: call void @llvm.arm.dmb(i32 11)
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: call void @llvm.arm.dmb(i32 11)
; CHECK-NEXT: ret float [[TMP6]]
;
%res = atomicrmw fsub float* %ptr, float %value seq_cst
ret float %res
}

Some files were not shown because too many files have changed in this diff Show More