mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[x86] enable CGP memcmp() expansion for 2/4/8 byte sizes
There are a couple of potential improvements as seen in the IR and asm: 1. We're unnecessarily extending to a larger type to compare values. 2. The codegen for (select cond, 1, -1) could avoid a cmov. (or we could change the order of the compares, so we have a select with 0 operand) llvm-svn: 305802
This commit is contained in:
parent
df21643743
commit
f4fed2c4b8
@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
MaxStoresPerMemcpyOptSize = 4;
|
||||
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
|
||||
MaxStoresPerMemmoveOptSize = 4;
|
||||
|
||||
// TODO: These control memcmp expansion in CGP and are set low to prevent
|
||||
// altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
|
||||
MaxLoadsPerMemcmp = 1;
|
||||
MaxLoadsPerMemcmpOptSize = 1;
|
||||
|
||||
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
|
||||
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
|
||||
|
||||
|
@ -2232,6 +2232,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
|
||||
return (CallerBits & CalleeBits) == CalleeBits;
|
||||
}
|
||||
|
||||
bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
|
||||
// TODO: We can increase these based on available vector ops.
|
||||
MaxLoadSize = ST->is64Bit() ? 8 : 4;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86TTIImpl::enableInterleavedAccessVectorization() {
|
||||
// TODO: We expect this to be beneficial regardless of arch,
|
||||
// but there are currently some unexplained performance artifacts on Atom.
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
bool isLegalMaskedScatter(Type *DataType);
|
||||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
|
||||
bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
|
||||
bool enableInterleavedAccessVectorization();
|
||||
private:
|
||||
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
|
||||
|
@ -12,19 +12,46 @@ declare i32 @memcmp(i8*, i8*, i64)
|
||||
|
||||
define i32 @length2(i8* %X, i8* %Y) nounwind {
|
||||
; X32-LABEL: length2:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl $0
|
||||
; X32-NEXT: pushl $2
|
||||
; X32-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: calll memcmp
|
||||
; X32-NEXT: addl $16, %esp
|
||||
; X32: # BB#0: # %loadbb
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movzwl (%ecx), %ecx
|
||||
; X32-NEXT: movzwl (%eax), %eax
|
||||
; X32-NEXT: rolw $8, %cx
|
||||
; X32-NEXT: rolw $8, %ax
|
||||
; X32-NEXT: movzwl %cx, %ecx
|
||||
; X32-NEXT: movzwl %ax, %eax
|
||||
; X32-NEXT: cmpl %eax, %ecx
|
||||
; X32-NEXT: je .LBB0_1
|
||||
; X32-NEXT: # BB#2: # %res_block
|
||||
; X32-NEXT: movl $-1, %eax
|
||||
; X32-NEXT: jb .LBB0_4
|
||||
; X32-NEXT: # BB#3: # %res_block
|
||||
; X32-NEXT: movl $1, %eax
|
||||
; X32-NEXT: .LBB0_4: # %endblock
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: .LBB0_1:
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length2:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl $2, %edx
|
||||
; X64-NEXT: jmp memcmp # TAILCALL
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: je .LBB0_1
|
||||
; X64-NEXT: # BB#2: # %res_block
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: .LBB0_1:
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
}
|
||||
@ -145,19 +172,42 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
|
||||
|
||||
define i32 @length4(i8* %X, i8* %Y) nounwind {
|
||||
; X32-LABEL: length4:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl $0
|
||||
; X32-NEXT: pushl $4
|
||||
; X32-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: calll memcmp
|
||||
; X32-NEXT: addl $16, %esp
|
||||
; X32: # BB#0: # %loadbb
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl (%ecx), %ecx
|
||||
; X32-NEXT: movl (%eax), %eax
|
||||
; X32-NEXT: bswapl %ecx
|
||||
; X32-NEXT: bswapl %eax
|
||||
; X32-NEXT: cmpl %eax, %ecx
|
||||
; X32-NEXT: je .LBB6_1
|
||||
; X32-NEXT: # BB#2: # %res_block
|
||||
; X32-NEXT: movl $-1, %eax
|
||||
; X32-NEXT: jb .LBB6_4
|
||||
; X32-NEXT: # BB#3: # %res_block
|
||||
; X32-NEXT: movl $1, %eax
|
||||
; X32-NEXT: .LBB6_4: # %endblock
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: .LBB6_1:
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length4:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl $4, %edx
|
||||
; X64-NEXT: jmp memcmp # TAILCALL
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: movl (%rsi), %ecx
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: bswapl %ecx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: je .LBB6_1
|
||||
; X64-NEXT: # BB#2: # %res_block
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: .LBB6_1:
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
|
||||
ret i32 %m
|
||||
}
|
||||
@ -259,9 +309,21 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: length8:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl $8, %edx
|
||||
; X64-NEXT: jmp memcmp # TAILCALL
|
||||
; X64: # BB#0: # %loadbb
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movq (%rsi), %rcx
|
||||
; X64-NEXT: bswapq %rax
|
||||
; X64-NEXT: bswapq %rcx
|
||||
; X64-NEXT: cmpq %rcx, %rax
|
||||
; X64-NEXT: je .LBB11_1
|
||||
; X64-NEXT: # BB#2: # %res_block
|
||||
; X64-NEXT: movl $-1, %ecx
|
||||
; X64-NEXT: movl $1, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: .LBB11_1:
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
|
||||
ret i32 %m
|
||||
}
|
||||
|
@ -6,9 +6,47 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
|
||||
|
||||
define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp2(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
|
||||
; ALL-NEXT: ret i32 [[CALL]]
|
||||
; X32-LABEL: @cmp2(
|
||||
; X32-NEXT: loadbb:
|
||||
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
|
||||
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
|
||||
; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
|
||||
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
|
||||
; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
|
||||
; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
|
||||
; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
|
||||
; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
|
||||
; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
|
||||
; X32: res_block:
|
||||
; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
|
||||
; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
|
||||
; X32-NEXT: br label %endblock
|
||||
; X32: endblock:
|
||||
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
|
||||
; X32-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
; X64-LABEL: @cmp2(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
|
||||
; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
|
||||
; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
|
||||
; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
|
||||
ret i32 %call
|
||||
@ -24,9 +62,45 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
}
|
||||
|
||||
define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp4(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||
; ALL-NEXT: ret i32 [[CALL]]
|
||||
; X32-LABEL: @cmp4(
|
||||
; X32-NEXT: loadbb:
|
||||
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
|
||||
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
|
||||
; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
|
||||
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
|
||||
; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
|
||||
; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
|
||||
; X32: res_block:
|
||||
; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
|
||||
; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; X32-NEXT: br label %endblock
|
||||
; X32: endblock:
|
||||
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
|
||||
; X32-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
; X64-LABEL: @cmp4(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
|
||||
; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
|
||||
; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
|
||||
; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
|
||||
; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||
ret i32 %call
|
||||
@ -60,9 +134,28 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
}
|
||||
|
||||
define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp8(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
; ALL-NEXT: ret i32 [[CALL]]
|
||||
; X32-LABEL: @cmp8(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
; X32-NEXT: ret i32 [[CALL]]
|
||||
;
|
||||
; X64-LABEL: @cmp8(
|
||||
; X64-NEXT: loadbb:
|
||||
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i64*
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i64*
|
||||
; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
|
||||
; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
|
||||
; X64: res_block:
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
|
||||
; X64-NEXT: br label %endblock
|
||||
; X64: endblock:
|
||||
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
|
||||
; X64-NEXT: ret i32 [[PHI_RES]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
ret i32 %call
|
||||
@ -142,8 +235,13 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
|
||||
define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq2(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16*
|
||||
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
|
||||
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
@ -168,8 +266,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
|
||||
define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq4(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32*
|
||||
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
|
||||
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
@ -219,11 +322,22 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
}
|
||||
|
||||
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
||||
; ALL-LABEL: @cmp_eq8(
|
||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; ALL-NEXT: ret i32 [[CONV]]
|
||||
; X32-LABEL: @cmp_eq8(
|
||||
; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
|
||||
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X32-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
; X64-LABEL: @cmp_eq8(
|
||||
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64*
|
||||
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64*
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
|
||||
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
|
||||
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
|
||||
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
|
||||
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
|
||||
; X64-NEXT: ret i32 [[CONV]]
|
||||
;
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
|
||||
%cmp = icmp eq i32 %call, 0
|
||||
|
Loading…
Reference in New Issue
Block a user