1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[x86, MemCmpExpansion] allow 2 pairs of loads per block (PR33325)

This is the last step needed to fix PR33325:
https://bugs.llvm.org/show_bug.cgi?id=33325

We're trading branch and compares for loads and logic ops. 
This makes the code smaller and hopefully faster in most cases.

The 24-byte test shows an interesting construct: we load the trailing scalar 
elements into vector registers and generate the same pcmpeq+movmsk code that 
we expected for a pair of full vector elements (see the 32- and 64-byte tests).

Differential Revision: https://reviews.llvm.org/D41714

llvm-svn: 321934
This commit is contained in:
Sanjay Patel 2018-01-06 16:16:04 +00:00
parent 3200a63add
commit eaf67121dd
5 changed files with 511 additions and 589 deletions

View File

@ -564,12 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {
// A memcmp with zero-comparison with only one block of load and compare does
// not need to set up any extra blocks. This case could be handled in the DAG,
// but since we have all of the machinery to flexibly expand any memcpy here,
// we choose to handle this case too to avoid fragmented lowering.
if ((!IsUsedForZeroCmp && NumLoadsPerBlockForZeroCmp != 1) ||
getNumBlocks() != 1) {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();

View File

@ -829,6 +829,11 @@ namespace llvm {
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;
/// Allow multiple load pairs per block for smaller and faster code.
unsigned getMemcmpEqZeroLoadsPerBlock() const override {
return 2;
}
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;

View File

@ -160,35 +160,22 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind optsize {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
; X86-NEXT: cmpw (%eax), %dx
; X86-NEXT: jne .LBB5_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movb 2(%ecx), %dl
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb 2(%eax), %dl
; X86-NEXT: je .LBB5_3
; X86-NEXT: .LBB5_2: # %res_block
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: incl %ecx
; X86-NEXT: .LBB5_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: xorw (%eax), %dx
; X86-NEXT: movb 2(%ecx), %cl
; X86-NEXT: xorb 2(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orw %dx, %ax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length3_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw (%rsi), %ax
; X64-NEXT: jne .LBB5_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorw (%rsi), %ax
; X64-NEXT: movb 2(%rdi), %cl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpb 2(%rsi), %cl
; X64-NEXT: je .LBB5_3
; X64-NEXT: .LBB5_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB5_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorb 2(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orw %ax, %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
@ -318,35 +305,22 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind optsize {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: cmpl (%eax), %edx
; X86-NEXT: jne .LBB10_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movb 4(%ecx), %dl
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb 4(%eax), %dl
; X86-NEXT: je .LBB10_3
; X86-NEXT: .LBB10_2: # %res_block
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: incl %ecx
; X86-NEXT: .LBB10_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: movb 4(%ecx), %cl
; X86-NEXT: xorb 4(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length5_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl (%rsi), %eax
; X64-NEXT: jne .LBB10_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorl (%rsi), %eax
; X64-NEXT: movb 4(%rdi), %cl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpb 4(%rsi), %cl
; X64-NEXT: je .LBB10_3
; X64-NEXT: .LBB10_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB10_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorb 4(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
@ -404,18 +378,10 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: cmpl (%eax), %edx
; X86-NEXT: jne .LBB12_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl 4(%eax), %edx
; X86-NEXT: je .LBB12_3
; X86-NEXT: .LBB12_2: # %res_block
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: incl %ecx
; X86-NEXT: .LBB12_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: movl 4(%ecx), %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: xorl 4(%eax), %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
@ -433,18 +399,12 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind optsize {
define i1 @length8_eq_const(i8* %X) nounwind optsize {
; X86-LABEL: length8_eq_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
; X86-NEXT: jne .LBB13_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
; X86-NEXT: je .LBB13_3
; X86-NEXT: .LBB13_2: # %res_block
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: incl %eax
; X86-NEXT: .LBB13_3: # %endblock
; X86-NEXT: testl %eax, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
; X86-NEXT: xorl (%eax), %ecx
; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
; X86-NEXT: xorl 4(%eax), %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
@ -475,17 +435,10 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind optsize {
; X64-LABEL: length12_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
; X64-NEXT: jne .LBB14_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorq (%rsi), %rax
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl 8(%rsi), %ecx
; X64-NEXT: je .LBB14_3
; X64-NEXT: .LBB14_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB14_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorl 8(%rsi), %ecx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
@ -703,37 +656,25 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: pand %xmm1, %xmm2
; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB20_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movq 16(%rdi), %rcx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
; X64-SSE2-NEXT: je .LBB20_3
; X64-SSE2-NEXT: .LBB20_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB20_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: length24_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX2-NEXT: jne .LBB20_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: movq 16(%rdi), %rcx
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: cmpq 16(%rsi), %rcx
; X64-AVX2-NEXT: je .LBB20_3
; X64-AVX2-NEXT: .LBB20_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB20_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
@ -757,38 +698,28 @@ define i1 @length24_eq_const(i8* %X) nounwind optsize {
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
; X64-SSE2-NEXT: movq %rax, %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB21_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
; X64-SSE2-NEXT: je .LBB21_3
; X64-SSE2-NEXT: .LBB21_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB21_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: length24_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
; X64-AVX2-NEXT: vmovq %rax, %xmm2
; X64-AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX2-NEXT: jne .LBB21_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
; X64-AVX2-NEXT: cmpq %rcx, 16(%rdi)
; X64-AVX2-NEXT: je .LBB21_3
; X64-AVX2-NEXT: .LBB21_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB21_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
@ -835,47 +766,28 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind optsize {
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
; X86-SSE2-NEXT: movdqu (%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
; X86-SSE2-NEXT: jne .LBB23_2
; X86-SSE2-NEXT: # %bb.1: # %loadbb1
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: je .LBB23_3
; X86-SSE2-NEXT: .LBB23_2: # %res_block
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: incl %eax
; X86-SSE2-NEXT: .LBB23_3: # %endblock
; X86-SSE2-NEXT: testl %eax, %eax
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB23_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-SSE2-NEXT: je .LBB23_3
; X64-SSE2-NEXT: .LBB23_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB23_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
@ -910,43 +822,24 @@ define i1 @length32_eq_const(i8* %X) nounwind optsize {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: jne .LBB24_2
; X86-SSE2-NEXT: # %bb.1: # %loadbb1
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: je .LBB24_3
; X86-SSE2-NEXT: .LBB24_2: # %res_block
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: incl %eax
; X86-SSE2-NEXT: .LBB24_3: # %endblock
; X86-SSE2-NEXT: testl %eax, %eax
; X86-SSE2-NEXT: pand %xmm1, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB24_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-SSE2-NEXT: je .LBB24_3
; X64-SSE2-NEXT: .LBB24_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB24_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
@ -1009,21 +902,12 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind optsize {
; X64-AVX2-LABEL: length64_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
; X64-AVX2-NEXT: jne .LBB26_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: cmpl $-1, %ecx
; X64-AVX2-NEXT: je .LBB26_3
; X64-AVX2-NEXT: .LBB26_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB26_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1059,21 +943,12 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize {
; X64-AVX2-LABEL: length64_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
; X64-AVX2-NEXT: jne .LBB27_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: cmpl $-1, %ecx
; X64-AVX2-NEXT: je .LBB27_3
; X64-AVX2-NEXT: .LBB27_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB27_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq

View File

@ -191,34 +191,22 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
; X86-NEXT: cmpw (%eax), %dx
; X86-NEXT: jne .LBB7_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movb 2(%ecx), %dl
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb 2(%eax), %dl
; X86-NEXT: je .LBB7_3
; X86-NEXT: .LBB7_2: # %res_block
; X86-NEXT: movl $1, %ecx
; X86-NEXT: .LBB7_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: xorw (%eax), %dx
; X86-NEXT: movb 2(%ecx), %cl
; X86-NEXT: xorb 2(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orw %dx, %ax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length3_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw (%rsi), %ax
; X64-NEXT: jne .LBB7_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorw (%rsi), %ax
; X64-NEXT: movb 2(%rdi), %cl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpb 2(%rsi), %cl
; X64-NEXT: je .LBB7_3
; X64-NEXT: .LBB7_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB7_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorb 2(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orw %ax, %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
@ -348,34 +336,22 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: cmpl (%eax), %edx
; X86-NEXT: jne .LBB12_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movb 4(%ecx), %dl
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb 4(%eax), %dl
; X86-NEXT: je .LBB12_3
; X86-NEXT: .LBB12_2: # %res_block
; X86-NEXT: movl $1, %ecx
; X86-NEXT: .LBB12_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: movb 4(%ecx), %cl
; X86-NEXT: xorb 4(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length5_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl (%rsi), %eax
; X64-NEXT: jne .LBB12_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorl (%rsi), %eax
; X64-NEXT: movb 4(%rdi), %cl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpb 4(%rsi), %cl
; X64-NEXT: je .LBB12_3
; X64-NEXT: .LBB12_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB12_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorb 4(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
@ -433,17 +409,10 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: cmpl (%eax), %edx
; X86-NEXT: jne .LBB14_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl 4(%eax), %edx
; X86-NEXT: je .LBB14_3
; X86-NEXT: .LBB14_2: # %res_block
; X86-NEXT: movl $1, %ecx
; X86-NEXT: .LBB14_3: # %endblock
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: movl 4(%ecx), %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: xorl 4(%eax), %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
@ -461,17 +430,12 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
define i1 @length8_eq_const(i8* %X) nounwind {
; X86-LABEL: length8_eq_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
; X86-NEXT: jne .LBB15_2
; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
; X86-NEXT: je .LBB15_3
; X86-NEXT: .LBB15_2: # %res_block
; X86-NEXT: movl $1, %eax
; X86-NEXT: .LBB15_3: # %endblock
; X86-NEXT: testl %eax, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
; X86-NEXT: xorl (%eax), %ecx
; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
; X86-NEXT: xorl 4(%eax), %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
@ -502,17 +466,10 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
; X64-LABEL: length12_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
; X64-NEXT: jne .LBB16_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: xorq (%rsi), %rax
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl 8(%rsi), %ecx
; X64-NEXT: je .LBB16_3
; X64-NEXT: .LBB16_2: # %res_block
; X64-NEXT: movl $1, %eax
; X64-NEXT: .LBB16_3: # %endblock
; X64-NEXT: testl %eax, %eax
; X64-NEXT: xorl 8(%rsi), %ecx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
@ -754,37 +711,25 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: pand %xmm1, %xmm2
; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB22_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movq 16(%rdi), %rcx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
; X64-SSE2-NEXT: je .LBB22_3
; X64-SSE2-NEXT: .LBB22_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB22_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX-NEXT: jne .LBB22_2
; X64-AVX-NEXT: # %bb.1: # %loadbb1
; X64-AVX-NEXT: movq 16(%rdi), %rcx
; X64-AVX-NEXT: xorl %eax, %eax
; X64-AVX-NEXT: cmpq 16(%rsi), %rcx
; X64-AVX-NEXT: je .LBB22_3
; X64-AVX-NEXT: .LBB22_2: # %res_block
; X64-AVX-NEXT: movl $1, %eax
; X64-AVX-NEXT: .LBB22_3: # %endblock
; X64-AVX-NEXT: testl %eax, %eax
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
@ -808,38 +753,28 @@ define i1 @length24_eq_const(i8* %X) nounwind {
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
; X64-SSE2-NEXT: movq %rax, %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB23_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
; X64-SSE2-NEXT: je .LBB23_3
; X64-SSE2-NEXT: .LBB23_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB23_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
; X64-AVX-NEXT: vmovq %rax, %xmm2
; X64-AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX-NEXT: jne .LBB23_2
; X64-AVX-NEXT: # %bb.1: # %loadbb1
; X64-AVX-NEXT: xorl %eax, %eax
; X64-AVX-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
; X64-AVX-NEXT: cmpq %rcx, 16(%rdi)
; X64-AVX-NEXT: je .LBB23_3
; X64-AVX-NEXT: .LBB23_2: # %res_block
; X64-AVX-NEXT: movl $1, %eax
; X64-AVX-NEXT: .LBB23_3: # %endblock
; X64-AVX-NEXT: testl %eax, %eax
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
@ -898,67 +833,40 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
; X86-SSE2-NEXT: movdqu (%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
; X86-SSE2-NEXT: jne .LBB25_2
; X86-SSE2-NEXT: # %bb.1: # %loadbb1
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: je .LBB25_3
; X86-SSE2-NEXT: .LBB25_2: # %res_block
; X86-SSE2-NEXT: movl $1, %eax
; X86-SSE2-NEXT: .LBB25_3: # %endblock
; X86-SSE2-NEXT: testl %eax, %eax
; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB25_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-SSE2-NEXT: je .LBB25_3
; X64-SSE2-NEXT: .LBB25_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB25_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm1, %xmm1
; X64-AVX1-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX1-NEXT: jne .LBB25_2
; X64-AVX1-NEXT: # %bb.1: # %loadbb1
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
; X64-AVX1-NEXT: xorl %eax, %eax
; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-AVX1-NEXT: je .LBB25_3
; X64-AVX1-NEXT: .LBB25_2: # %res_block
; X64-AVX1-NEXT: movl $1, %eax
; X64-AVX1-NEXT: .LBB25_3: # %endblock
; X64-AVX1-NEXT: testl %eax, %eax
; X64-AVX1-NEXT: sete %al
; X64-AVX1-NEXT: retq
;
@ -1005,63 +913,36 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: jne .LBB26_2
; X86-SSE2-NEXT: # %bb.1: # %loadbb1
; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X86-SSE2-NEXT: je .LBB26_3
; X86-SSE2-NEXT: .LBB26_2: # %res_block
; X86-SSE2-NEXT: movl $1, %eax
; X86-SSE2-NEXT: .LBB26_3: # %endblock
; X86-SSE2-NEXT: testl %eax, %eax
; X86-SSE2-NEXT: pand %xmm1, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: jne .LBB26_2
; X64-SSE2-NEXT: # %bb.1: # %loadbb1
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
; X64-SSE2-NEXT: xorl %eax, %eax
; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-SSE2-NEXT: je .LBB26_3
; X64-SSE2-NEXT: .LBB26_2: # %res_block
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: .LBB26_3: # %endblock
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq_const:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm1, %xmm1
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-AVX1-NEXT: jne .LBB26_2
; X64-AVX1-NEXT: # %bb.1: # %loadbb1
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
; X64-AVX1-NEXT: xorl %eax, %eax
; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
; X64-AVX1-NEXT: je .LBB26_3
; X64-AVX1-NEXT: .LBB26_2: # %res_block
; X64-AVX1-NEXT: movl $1, %eax
; X64-AVX1-NEXT: .LBB26_3: # %endblock
; X64-AVX1-NEXT: testl %eax, %eax
; X64-AVX1-NEXT: setne %al
; X64-AVX1-NEXT: retq
;
@ -1134,21 +1015,12 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
; X64-AVX2-LABEL: length64_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
; X64-AVX2-NEXT: jne .LBB28_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: cmpl $-1, %ecx
; X64-AVX2-NEXT: je .LBB28_3
; X64-AVX2-NEXT: .LBB28_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB28_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1195,21 +1067,12 @@ define i1 @length64_eq_const(i8* %X) nounwind {
; X64-AVX2-LABEL: length64_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
; X64-AVX2-NEXT: jne .LBB29_2
; X64-AVX2-NEXT: # %bb.1: # %loadbb1
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
; X64-AVX2-NEXT: xorl %eax, %eax
; X64-AVX2-NEXT: cmpl $-1, %ecx
; X64-AVX2-NEXT: je .LBB29_3
; X64-AVX2-NEXT: .LBB29_2: # %res_block
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: .LBB29_3: # %endblock
; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -expandmemcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
; RUN: opt -S -expandmemcmp -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_1LD
; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_2LD
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
@ -430,29 +431,69 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq3(
; ALL-NEXT: br label [[LOADBB:%.*]]
; ALL: res_block:
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb:
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
; X32-LABEL: @cmp_eq3(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64_1LD-LABEL: @cmp_eq3(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq3(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
%cmp = icmp eq i32 %call, 0
@ -479,29 +520,69 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq5(
; ALL-NEXT: br label [[LOADBB:%.*]]
; ALL: res_block:
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb:
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
; X32-LABEL: @cmp_eq5(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64_1LD-LABEL: @cmp_eq5(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq5(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
%cmp = icmp eq i32 %call, 0
@ -510,31 +591,75 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq6(
; ALL-NEXT: br label [[LOADBB:%.*]]
; ALL: res_block:
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb:
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; ALL-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
; ALL-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; ALL-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; ALL-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; ALL-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
; X32-LABEL: @cmp_eq6(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
; X32-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64_1LD-LABEL: @cmp_eq6(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq6(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
%cmp = icmp eq i32 %call, 0
@ -557,28 +682,22 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X32-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X32: loadbb1:
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
; X32-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X32-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@ -606,29 +725,49 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64-LABEL: @cmp_eq9(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
; X64_1LD-LABEL: @cmp_eq9(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq9(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
%cmp = icmp eq i32 %call, 0
@ -643,31 +782,53 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64-LABEL: @cmp_eq10(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
; X64-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
; X64_1LD-LABEL: @cmp_eq10(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq10(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
%cmp = icmp eq i32 %call, 0
@ -695,31 +856,53 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
; X64-LABEL: @cmp_eq12(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
; X64-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
; X64_1LD-LABEL: @cmp_eq12(
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq12(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
%cmp = icmp eq i32 %call, 0