reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s

; This normally would be fixed by instcombine to be compare to the GEP
; indices

; NOLDS-NOT: addrspace(3)

; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1
define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
  %cmp = icmp eq i32* %ptr0, %ptr1
  %zext = zext i1 %cmp to i32
  store volatile i32 %zext, i32 addrspace(1)* %out
  ret void
}

; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null
define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %cmp = icmp eq i32* %ptr0, null
  %zext = zext i1 %cmp to i32
  store volatile i32 %zext, i32 addrspace(1)* %out
  ret void
}

; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0
define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %cmp = icmp eq i32* null, %ptr0
  %zext = zext i1 %cmp to i32
  store volatile i32 %zext, i32 addrspace(1)* %out
  ret void
}

; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
; CHECK: %alloca = alloca [16 x i32], align 4
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
; CHECK: %ptr1 = call i32* @get_unknown_pointer()
; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1
define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %ptr1 = call i32* @get_unknown_pointer()
  %cmp = icmp eq i32* %ptr0, %ptr1
  %zext = zext i1 %cmp to i32
  store volatile i32 %zext, i32 addrspace(1)* %out
  ret void
}

declare i32* @get_unknown_pointer() #0

attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }