reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
; RUN: llc -march=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s

; CHECK-LABEL: @switch_unreachable_default

define amdgpu_kernel void @switch_unreachable_default(i32 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1) #0 {
centry:
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  switch i32 %tid, label %sw.default [
    i32 0, label %sw.bb0
    i32 1, label %sw.bb1
  ]

sw.bb0:
  br label %sw.epilog

sw.bb1:
  br label %sw.epilog

sw.default:
  unreachable

sw.epilog:
  %ptr = phi i8 addrspace(1)* [%in0, %sw.bb0], [%in1, %sw.bb1]
  %gep_in = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 0
  br label %sw.while

; The loop below is necessary to preserve the effect of the
; unreachable default on divergence analysis in the presence of other
; optimizations. The loop consists of a single block where the loop
; exit is divergent because it depends on the divergent phi at the
; start of the block. The checks below ensure that the loop exit is
; handled correctly as divergent. But the data-flow within the block
; is sensitive to optimizations; so we just ensure that the relevant
; operations in the block body are indeed in the same block.

; CHECK: [[PHI:%[a-zA-Z0-9._]+]]  = phi i64
; CHECK-NOT: {{ br }}
; CHECK: load i8
; CHECK-NOT: {{ br }}
; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
; CHECK: [[IF:%[a-zA-Z0-9._]+]]   = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[ICMP]], i64 [[PHI]])
; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
; CHECK: br i1 [[LOOP]]

sw.while:
  %p = phi i8 addrspace(1)* [ %gep_in, %sw.epilog ], [ %incdec.ptr, %sw.while ]
  %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
  %char = load i8, i8 addrspace(1)* %p, align 1
  %tobool = icmp eq i8 %char, 0
  %incdec.ptr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 1
  %count.inc = add i32 %count, 1
  br i1 %tobool, label %sw.exit, label %sw.while

sw.exit:
  %tid64 = zext i32 %tid to i64
  %gep_out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid64
  store i32 %count, i32 addrspace(1)* %gep_out, align 4
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }
attributes #1 = { convergent noinline optnone }