[X86] Zero-extend pointers to i64 for x86_64

For LP64 mode, this has no effect as pointers are already 64 bits. For ILP32 mode (x32), this extension is specified by the ABI. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D91338
2024-11-22 18:54:02 +01:00 · 2020-11-30 18:51:23 +00:00 · 2020-11-30 18:51:23 +00:00 · 398e3ba1d1
commit 398e3ba1d1
parent d592ef7991
10 changed files with 59 additions and 40 deletions
--- a/lib/Target/X86/X86CallingConv.cpp
+++ b/lib/Target/X86/X86CallingConv.cpp
@ -330,5 +330,15 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  return true;
 }

+static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                              CCValAssign::LocInfo &LocInfo,
+                              ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  if (LocVT != MVT::i64) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::ZExt;
+  }
+  return false;
+}
+
 // Provides entry points of CC_X86 and RetCC_X86.
 #include "X86GenCallingConv.inc"
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@ -336,6 +336,9 @@ def RetCC_X86_64_C : CallingConv<[
  // MMX vector types are always returned in XMM0.
  CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,

+  // Pointers are always returned in full 64-bit registers.
+  CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,

  CCDelegateTo<RetCC_X86Common>
@ -518,6 +521,9 @@ def CC_X86_64_C : CallingConv<[
  CCIfCC<"CallingConv::Swift",
    CCIfSRet<CCIfType<[i64], CCAssignToReg<[RAX]>>>>,

+  // Pointers are always passed in full 64-bit registers.
+  CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
  // The first 6 integer arguments are passed in integer registers.
  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -3067,8 +3067,9 @@ SDValue X86TargetLowering::LowerCallResult(
                        // This truncation won't change the value.
                        DAG.getIntPtrConstant(1, dl));

-    if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
+    if (VA.isExtInLoc()) {
      if (VA.getValVT().isVector() &&
+          VA.getValVT().getScalarType() == MVT::i1 &&
          ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
           (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
        // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
--- a/test/CodeGen/X86/musttail-varargs.ll
+++ b/test/CodeGen/X86/musttail-varargs.ll
@ -136,7 +136,7 @@ define void @f_thunk(i8* %this, ...) {
 ; LINUX-X32-NEXT:    movq %rcx, %r13
 ; LINUX-X32-NEXT:    movq %rdx, %rbp
 ; LINUX-X32-NEXT:    movq %rsi, %rbx
-; LINUX-X32-NEXT:    movl %edi, %r14d
+; LINUX-X32-NEXT:    movq %rdi, %r14
 ; LINUX-X32-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; LINUX-X32-NEXT:    testb %al, %al
 ; LINUX-X32-NEXT:    je .LBB0_2
@ -161,7 +161,7 @@ define void @f_thunk(i8* %this, ...) {
 ; LINUX-X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; LINUX-X32-NEXT:    movabsq $206158430216, %rax # imm = 0x3000000008
 ; LINUX-X32-NEXT:    movq %rax, {{[0-9]+}}(%esp)
-; LINUX-X32-NEXT:    movl %r14d, %edi
+; LINUX-X32-NEXT:    movq %r14, %rdi
 ; LINUX-X32-NEXT:    movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@ -172,7 +172,7 @@ define void @f_thunk(i8* %this, ...) {
 ; LINUX-X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    callq get_f
 ; LINUX-X32-NEXT:    movl %eax, %r11d
-; LINUX-X32-NEXT:    movl %r14d, %edi
+; LINUX-X32-NEXT:    movq %r14, %rdi
 ; LINUX-X32-NEXT:    movq %rbx, %rsi
 ; LINUX-X32-NEXT:    movq %rbp, %rdx
 ; LINUX-X32-NEXT:    movq %r13, %rcx
@ -304,8 +304,7 @@ define void @g_thunk(i8* %fptr_i8, ...) {
 ;
 ; LINUX-X32-LABEL: g_thunk:
 ; LINUX-X32:       # %bb.0:
-; LINUX-X32-NEXT:    movl %edi, %r11d
-; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
+; LINUX-X32-NEXT:    jmpq *%rdi # TAILCALL
 ;
 ; WINDOWS-LABEL: g_thunk:
 ; WINDOWS:       # %bb.0:
@ -346,10 +345,12 @@ define void @h_thunk(%struct.Foo* %this, ...) {
 ; LINUX-X32-NEXT:    jne .LBB2_2
 ; LINUX-X32-NEXT:  # %bb.1: # %then
 ; LINUX-X32-NEXT:    movl 4(%edi), %r11d
+; LINUX-X32-NEXT:    movl %edi, %edi
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-X32-NEXT:  .LBB2_2: # %else
 ; LINUX-X32-NEXT:    movl 8(%edi), %r11d
 ; LINUX-X32-NEXT:    movl $42, {{.*}}(%rip)
+; LINUX-X32-NEXT:    movl %edi, %edi
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: h_thunk:
--- a/test/CodeGen/X86/pr38865-2.ll
+++ b/test/CodeGen/X86/pr38865-2.ll
@ -10,6 +10,7 @@ define void @_Z1bv(%struct.a* noalias sret(%struct.a) %agg.result) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    callq _Z1bv
--- a/test/CodeGen/X86/pr38865-3.ll
+++ b/test/CodeGen/X86/pr38865-3.ll
@ -10,6 +10,7 @@ define void @foo(i8* %x) optsize {
 ; CHECK-NEXT:    movl $707406378, %eax # encoding: [0xb8,0x2a,0x2a,0x2a,0x2a]
 ; CHECK-NEXT:    # imm = 0x2A2A2A2A
 ; CHECK-NEXT:    movl $32, %ecx # encoding: [0xb9,0x20,0x00,0x00,0x00]
+; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; CHECK-NEXT:    rep;stosl %eax, %es:(%edi) # encoding: [0xf3,0x67,0xab]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
  call void @llvm.memset.p0i8.i32(i8* align 4 %x, i8 42, i32 128, i1 false)
--- a/test/CodeGen/X86/pr38865.ll
+++ b/test/CodeGen/X86/pr38865.ll
@ -15,7 +15,7 @@ define void @e() nounwind {
 ; CHECK-NEXT:    subl $528, %esp # encoding: [0x81,0xec,0x10,0x02,0x00,0x00]
 ; CHECK-NEXT:    # imm = 0x210
 ; CHECK-NEXT:    leal {{[0-9]+}}(%rsp), %ebx # encoding: [0x8d,0x9c,0x24,0x08,0x01,0x00,0x00]
-; CHECK-NEXT:    movl %ebx, %edi # encoding: [0x89,0xdf]
+; CHECK-NEXT:    movq %rbx, %rdi # encoding: [0x48,0x89,0xdf]
 ; CHECK-NEXT:    movl $c, %esi # encoding: [0xbe,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: c, kind: FK_Data_4
 ; CHECK-NEXT:    movl $260, %edx # encoding: [0xba,0x04,0x01,0x00,0x00]
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@ -74,16 +74,13 @@ define void @t4(void (i32)* nocapture %x) nounwind ssp {
 ;
 ; X32-LABEL: t4:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movq %rdi, %rax
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    jmpq *%rax # TAILCALL
  tail call void %x(i32 0) nounwind
  ret void
 }

-; FIXME: This isn't needed since x32 psABI specifies that callers must
-;        zero-extend pointers passed in registers.
-
 define void @t5(void ()* nocapture %x) nounwind ssp {
 ; X86-LABEL: t5:
 ; X86:       # %bb.0:
@ -95,8 +92,7 @@ define void @t5(void ()* nocapture %x) nounwind ssp {
 ;
 ; X32-LABEL: t5:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    jmpq *%rax # TAILCALL
+; X32-NEXT:    jmpq *%rdi # TAILCALL
  tail call void %x() nounwind
  ret void
 }
@ -227,7 +223,7 @@ define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
 ;
 ; X32-LABEL: t9:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movq %rdi, %rax
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    jmpq *%rax # TAILCALL
 entry:
@ -400,6 +396,7 @@ define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
 ; X32-NEXT:    pushq %rcx
 ; X32-NEXT:    callq foo7
 ; X32-NEXT:    addl $32, %esp
+; X32-NEXT:    movl %eax, %eax
 ; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 entry:
@ -477,7 +474,7 @@ define void @t15(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind  {
 ; X32-LABEL: t15:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq f
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -651,7 +648,7 @@ define fastcc void @t21_sret_to_sret(%struct.foo* noalias sret(%struct.foo) %agg
 ; X32-LABEL: t21_sret_to_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -689,7 +686,7 @@ define fastcc void @t21_sret_to_sret_alloca(%struct.foo* noalias sret(%struct.fo
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    subl $16, %esp
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    movl %esp, %edi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
@ -727,7 +724,7 @@ define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct
 ; X32-LABEL: t21_sret_to_sret_more_args:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -762,8 +759,8 @@ define fastcc void @t21_sret_to_sret_second_arg_sret(%struct.foo* noalias %agg.r
 ; X32-LABEL: t21_sret_to_sret_second_arg_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %esi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rsi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -803,7 +800,7 @@ define fastcc void @t21_sret_to_sret_more_args2(%struct.foo* noalias sret(%struc
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    callq f_sret
@ -841,8 +838,8 @@ define fastcc void @t21_sret_to_sret_args_mismatch(%struct.foo* noalias sret(%st
 ; X32-LABEL: t21_sret_to_sret_args_mismatch:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rdi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -877,8 +874,8 @@ define fastcc void @t21_sret_to_sret_args_mismatch2(%struct.foo* noalias sret(%s
 ; X32-LABEL: t21_sret_to_sret_args_mismatch2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rdi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@ -915,7 +912,7 @@ define fastcc void @t21_sret_to_sret_arg_mismatch(%struct.foo* noalias sret(%str
 ; X32-LABEL: t21_sret_to_sret_arg_mismatch:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq ret_struct
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    callq t21_f_sret
@ -964,19 +961,19 @@ define fastcc void @t21_sret_to_sret_structs_mismatch(%struct.foo* noalias sret(
 ;
 ; X32-LABEL: t21_sret_to_sret_structs_mismatch:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbp
+; X32-NEXT:    pushq %r14
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    pushq %rax
-; X32-NEXT:    movl %esi, %ebx
-; X32-NEXT:    movl %edi, %ebp
+; X32-NEXT:    movq %rsi, %rbx
+; X32-NEXT:    movq %rdi, %r14
 ; X32-NEXT:    callq ret_struct
-; X32-NEXT:    movl %ebx, %edi
 ; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    movq %rbx, %rdi
 ; X32-NEXT:    callq t21_f_sret2
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl %r14d, %eax
 ; X32-NEXT:    addl $8, %esp
 ; X32-NEXT:    popq %rbx
-; X32-NEXT:    popq %rbp
+; X32-NEXT:    popq %r14
 ; X32-NEXT:    retq
  %b = call fastcc %struct.foo* @ret_struct()
  tail call fastcc void @t21_f_sret2(%struct.foo* noalias sret(%struct.foo) %a, %struct.foo* noalias %b) nounwind
@ -1010,7 +1007,7 @@ define fastcc void @t21_sret_to_non_sret(%struct.foo* noalias sret(%struct.foo)
 ; X32-LABEL: t21_sret_to_non_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq t21_f_non_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
--- a/test/CodeGen/X86/x32-function_pointer-2.ll
+++ b/test/CodeGen/X86/x32-function_pointer-2.ll
@ -13,9 +13,9 @@
 define void @bar(i8* %h, void (i8*)* nocapture %foo) nounwind {
 entry:
  tail call void %foo(i8* %h) nounwind
-; CHECK: mov{{l|q}}	%{{e|r}}si, %{{e|r}}[[REG:.*]]{{d?}}
-; CHECK: callq	*%r[[REG]]
+; CHECK: mov{{l|q}}	%{{e|r}}si,
+; CHECK: callq	*%r
  tail call void %foo(i8* %h) nounwind
-; CHECK: jmpq	*%r{{[^,]*}}
+; CHECK: jmpq	*%r
  ret void
 }
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@ -7,9 +7,10 @@
 ; CHECK-LABEL: bar:
 ; CHECK: movq %rdi, %rax

-; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; For the x32 ABI, pointers are 32-bit but passed in zero-extended to 64-bit
+; so either 32-bit or 64-bit instructions may be used.
 ; X32ABI-LABEL: bar:
-; X32ABI: movl %edi, %eax
+; X32ABI: mov{{l|q}} %{{r|e}}di, %{{r|e}}ax

 define void @bar(%struct.foo* noalias sret(%struct.foo)  %agg.result, %struct.foo* %d) nounwind  {
 entry:
@ -63,9 +64,10 @@ return:		; preds = %entry
 ; CHECK-LABEL: foo:
 ; CHECK: movq %rdi, %rax

-; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; For the x32 ABI, pointers are 32-bit but passed in zero-extended to 64-bit
+; so either 32-bit or 64-bit instructions may be used.
 ; X32ABI-LABEL: foo:
-; X32ABI: movl %edi, %eax
+; X32ABI: mov{{l|q}} %{{r|e}}di, %{{r|e}}ax

 define void @foo({ i64 }* noalias nocapture sret({ i64 }) %agg.result) nounwind {
  store { i64 } { i64 0 }, { i64 }* %agg.result