reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
;
; Here all the branches we unswitch are exiting from the inner loop.
; That means we should not be getting exponential behavior on inner-loop
; unswitch. In fact there should be just a single version of inner-loop,
; with possibly some outer loop copies.
;
; There should be just a single copy of each loop when strictest mutiplier
; candidates formula (unscaled candidates == 0) is enforced:

; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
; RUN:     -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
; RUN:     -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
; RUN:     -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
; RUN:     -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
;
; When we relax the candidates part of a multiplier formula
; When we relax the candidates part of a multiplier formula
; (unscaled candidates == 2) we start getting some unswitches in outer loops,
; which leads to siblings multiplier kicking in.
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=1 \
; RUN:     -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV1
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=1 \
; RUN:     -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV1
;
; NB: sort -b is essential here and below, otherwise blanks might lead to different
; order depending on locale.
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=2 \
; RUN:     -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV2
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=2 \
; RUN:     -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV2
;
; With disabled cost-multiplier we get maximal possible amount of unswitches.
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=false \
; RUN:     -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:	   sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=false \
; RUN:     -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN:	   sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
;
; Single loop nest, not unswitched
; LOOP1:     Loop at depth 1 containing:
; LOOP1-NOT:  Loop at depth 1 containing:
; LOOP1:     Loop at depth 2 containing:
; LOOP1-NOT:  Loop at depth 2 containing:
; LOOP1:     Loop at depth 3 containing:
; LOOP1-NOT:  Loop at depth 3 containing:
;
; Half unswitched loop nests, with unscaled3 and div1 it gets less depth1 loops unswitched
; since they have more cost.
; LOOP-UNSCALE3-DIV1-COUNT-4: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 3 containing:
; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 3 containing:
;
; Half unswitched loop nests, with unscaled3 and div2 it gets more depth1 loops unswitched
; as div2 kicks in.
; LOOP-UNSCALE3-DIV2-COUNT-6: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 3 containing:
; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 3 containing:
;
; Maximally unswitched (copy of the outer loop per each condition)
; LOOP-MAX-COUNT-6: Loop at depth 1 containing:
; LOOP-MAX-NOT:      Loop at depth 1 containing:
; LOOP-MAX-COUNT-1: Loop at depth 2 containing:
; LOOP-MAX-NOT:      Loop at depth 2 containing:
; LOOP-MAX-COUNT-1: Loop at depth 3 containing:
; LOOP-MAX-NOT:      Loop at depth 3 containing:

declare void @bar()

define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
entry:
  %addr1 = getelementptr i32, i32* %addr, i64 0
  %addr2 = getelementptr i32, i32* %addr, i64 1
  %addr3 = getelementptr i32, i32* %addr, i64 2
  br label %outer
outer:
  %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
  %iv1.next = add i32 %iv1, 1
  ;; skip nontrivial unswitch
  call void @bar()
  br label %middle
middle:
  %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch]
  %iv2.next = add i32 %iv2, 1
  ;; skip nontrivial unswitch
  call void @bar()
  br label %loop
loop:
  %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch]
  %iv3.next = add i32 %iv3, 1
  ;; skip nontrivial unswitch
  call void @bar()
  br i1 %c1, label %loop_next1_left, label %outer_latch
loop_next1_left:
  br label %loop_next1
loop_next1_right:
  br label %loop_next1

loop_next1:
  br i1 %c2, label %loop_next2_left, label %outer_latch
loop_next2_left:
  br label %loop_next2
loop_next2_right:
  br label %loop_next2

loop_next2:
  br i1 %c3, label %loop_next3_left, label %outer_latch
loop_next3_left:
  br label %loop_next3
loop_next3_right:
  br label %loop_next3

loop_next3:
  br i1 %c4, label %loop_next4_left, label %outer_latch
loop_next4_left:
  br label %loop_next4
loop_next4_right:
  br label %loop_next4

loop_next4:
  br i1 %c5, label %loop_latch_left, label %outer_latch
loop_latch_left:
  br label %loop_latch
loop_latch_right:
  br label %loop_latch

loop_latch:
  store volatile i32 0, i32* %addr1
  %test_loop = icmp slt i32 %iv3, 50
  br i1 %test_loop, label %loop, label %middle_latch
middle_latch:
  store volatile i32 0, i32* %addr2
  %test_middle = icmp slt i32 %iv2, 50
  br i1 %test_middle, label %middle, label %outer_latch
outer_latch:
  store volatile i32 0, i32* %addr3
  %test_outer = icmp slt i32 %iv1, 50
  br i1 %test_outer, label %outer, label %exit
exit:
  ret void
}