1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[X86] Make memcmp vector lowering handle arbitrary expansions

Teach combineVectorSizedSetCCEquality() to handle arbitrary memcmp
expansions but do not change any default policy for now.

This also fixes a bug in the memcmp expansion itself when large
displacements are needed.

https://reviews.llvm.org/D69507
This commit is contained in:
David Zarzycki 2019-10-28 14:39:40 +02:00
parent 975fa8af18
commit 67e76cc404
5 changed files with 6348 additions and 99 deletions

View File

@ -264,9 +264,9 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
uint64_t OffsetBytes) {
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
Source = Builder.CreateGEP(
Source = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
ConstantInt::get(ByteType, OffsetBytes));
OffsetBytes);
}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
}

View File

@ -42669,6 +42669,44 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
/// recognizable memcmp expansion.
static bool isOrXorXorTree(SDValue X, bool Root = true) {
if (X.getOpcode() == ISD::OR)
return isOrXorXorTree(X.getOperand(0), false) &&
isOrXorXorTree(X.getOperand(1), false);
if (Root)
return false;
return X.getOpcode() == ISD::XOR;
}
/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
/// expansion.
template<typename F>
static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
SDValue Op0 = X.getOperand(0);
SDValue Op1 = X.getOperand(1);
if (X.getOpcode() == ISD::OR) {
SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
if (VecVT != CmpVT)
return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
if (HasPT)
return DAG.getNode(ISD::OR, DL, VecVT, A, B);
return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
} else if (X.getOpcode() == ISD::XOR) {
SDValue A = SToV(Op0);
SDValue B = SToV(Op1);
if (VecVT != CmpVT)
return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
if (HasPT)
return DAG.getNode(ISD::XOR, DL, VecVT, A, B);
return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
}
llvm_unreachable("Impossible");
}
/// Try to map a 128-bit or larger integer comparison to vector instructions
/// before type legalization splits it up into chunks.
static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
@ -42689,10 +42727,8 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
// logically-combined vector-sized operands compared to zero. This pattern may
// be generated by the memcmp expansion pass with oversized integer compares
// (see PR33325).
bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR &&
X.getOperand(0).getOpcode() == ISD::XOR &&
X.getOperand(1).getOpcode() == ISD::XOR;
if (isNullConstant(Y) && !IsOrXorXorCCZero)
bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
return SDValue();
// Don't perform this combine if constructing the vector will be expensive.
@ -42702,7 +42738,7 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
X.getOpcode() == ISD::LOAD;
};
if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
!IsOrXorXorCCZero)
!IsOrXorXorTreeCCZero)
return SDValue();
EVT VT = SetCC->getValueType(0);
@ -42775,28 +42811,12 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
};
SDValue Cmp;
if (IsOrXorXorCCZero) {
if (IsOrXorXorTreeCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
// setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
// Use 2 vector equality compares and 'and' the results before doing a
// MOVMSK.
SDValue A = ScalarToVector(X.getOperand(0).getOperand(0));
SDValue B = ScalarToVector(X.getOperand(0).getOperand(1));
SDValue C = ScalarToVector(X.getOperand(1).getOperand(0));
SDValue D = ScalarToVector(X.getOperand(1).getOperand(1));
if (VecVT != CmpVT) {
SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETNE);
Cmp = DAG.getNode(ISD::OR, DL, CmpVT, Cmp1, Cmp2);
} else if (HasPT) {
SDValue Cmp1 = DAG.getNode(ISD::XOR, DL, VecVT, A, B);
SDValue Cmp2 = DAG.getNode(ISD::XOR, DL, VecVT, C, D);
Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp1, Cmp2);
} else {
SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
}
Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
} else {
SDValue VecX = ScalarToVector(X);
SDValue VecY = ScalarToVector(Y);

View File

@ -19,9 +19,9 @@ entry:
; CHECK-LABEL: loadbb1:{{.*}}
; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8*
; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8*
; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i8 8
; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i8 8
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
@ -44,9 +44,9 @@ entry:
; CHECK-BE-LABEL: loadbb1:{{.*}}
; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8*
; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8*
; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i8 8
; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i8 8
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]

File diff suppressed because it is too large Load Diff

View File

@ -41,8 +41,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 2
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
@ -95,8 +95,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
@ -130,9 +130,9 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]]
; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
@ -178,9 +178,9 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]]
; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
@ -233,8 +233,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
@ -272,9 +272,9 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]]
; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
@ -324,9 +324,9 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]]
; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
@ -394,9 +394,9 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64*
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64*
; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP11]]
; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]]
@ -437,8 +437,8 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
@ -463,8 +463,8 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
@ -481,8 +481,8 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
@ -526,8 +526,8 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
@ -552,8 +552,8 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
@ -570,8 +570,8 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
@ -597,9 +597,9 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
@ -625,9 +625,9 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
@ -645,9 +645,9 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
@ -674,9 +674,9 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -700,9 +700,9 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -720,9 +720,9 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -747,9 +747,9 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -797,8 +797,8 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
@ -815,8 +815,8 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
@ -854,9 +854,9 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
@ -874,9 +874,9 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
@ -915,9 +915,9 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -935,9 +935,9 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -974,9 +974,9 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -994,9 +994,9 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
@ -1035,9 +1035,9 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -1055,9 +1055,9 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -1094,9 +1094,9 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -1114,9 +1114,9 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -1153,9 +1153,9 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
@ -1173,9 +1173,9 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]