reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
; RUN: opt -loop-reduce -S < %s | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"

; LSR used not to be able to generate a float* induction variable in
; these cases due to scalar evolution not propagating nsw from an
; instruction to the SCEV, preventing distributing sext into the
; corresponding addrec.

; Test this pattern:
;
;   for (int i = 0; i < numIterations; ++i)
;     sum += ptr[i + offset];
;
define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
; CHECK-LABEL: @testadd
; CHECK: sext i32 %offset to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext

entry:
  br label %loop

loop:
  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
  %index32 = add nuw nsw i32 %i, %offset
  %index64 = sext i32 %index32 to i64
  %ptr = getelementptr inbounds float, float* %input, i64 %index64
  %addend = load float, float* %ptr, align 4
  %nextsum = fadd float %sum, %addend
  %nexti = add nuw nsw i32 %i, 1
  %exitcond = icmp eq i32 %nexti, %numIterations
  br i1 %exitcond, label %exit, label %loop

exit:
  ret float %nextsum
}

; Test this pattern:
;
;   for (int i = 0; i < numIterations; ++i)
;     sum += ptr[i - offset];
;
define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
; CHECK-LABEL: @testsub
; CHECK: sext i32 %offset to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext

entry:
  br label %loop

loop:
  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
  %index32 = sub nuw nsw i32 %i, %offset
  %index64 = sext i32 %index32 to i64
  %ptr = getelementptr inbounds float, float* %input, i64 %index64
  %addend = load float, float* %ptr, align 4
  %nextsum = fadd float %sum, %addend
  %nexti = add nuw nsw i32 %i, 1
  %exitcond = icmp eq i32 %nexti, %numIterations
  br i1 %exitcond, label %exit, label %loop

exit:
  ret float %nextsum
}

; Test this pattern:
;
;   for (int i = 0; i < numIterations; ++i)
;     sum += ptr[i * stride];
;
define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
; CHECK-LABEL: @testmul
; CHECK: sext i32 %stride to i64
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext

entry:
  br label %loop

loop:
  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
  %index32 = mul nuw nsw i32 %i, %stride
  %index64 = sext i32 %index32 to i64
  %ptr = getelementptr inbounds float, float* %input, i64 %index64
  %addend = load float, float* %ptr, align 4
  %nextsum = fadd float %sum, %addend
  %nexti = add nuw nsw i32 %i, 1
  %exitcond = icmp eq i32 %nexti, %numIterations
  br i1 %exitcond, label %exit, label %loop

exit:
  ret float %nextsum
}

; Test this pattern:
;
;   for (int i = 0; i < numIterations; ++i)
;     sum += ptr[3 * (i << 7)];
;
; The multiplication by 3 is to make the address calculation expensive
; enough to force the introduction of a pointer induction variable.
define float @testshl(float* %input, i32 %numIterations) {
; CHECK-LABEL: @testshl
; CHECK: loop:
; CHECK-DAG: phi float*
; CHECK-DAG: phi i32
; CHECK-NOT: sext

entry:
  br label %loop

loop:
  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
  %index32 = shl nuw nsw i32 %i, 7
  %index32mul = mul nuw nsw i32 %index32, 3
  %index64 = sext i32 %index32mul to i64
  %ptr = getelementptr inbounds float, float* %input, i64 %index64
  %addend = load float, float* %ptr, align 4
  %nextsum = fadd float %sum, %addend
  %nexti = add nuw nsw i32 %i, 1
  %exitcond = icmp eq i32 %nexti, %numIterations
  br i1 %exitcond, label %exit, label %loop

exit:
  ret float %nextsum
}