reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
# RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s

# Test that the Post RA scheduler does not schedule back-to-back loads
# when there is another instruction to schedule. The scheduler avoids
# the back-to-back loads to reduce potential bank conflicts.

# CHECK: = L2_loadrigp
# CHECK: = A2_tfr
# CHECK: = L2_loadrigp

# CHECK: = L4_loadri_rr
# CHECK: = S2_tstbit_i
# CHECK: = L4_loadri_rr

--- |
  %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
  %s.1 = type { i32, i32 }

  @g0 = global i64 0
  @g1 = global i64 0
  @g2 = global i32 0
  @g3 = global i32 0
  @g4 = global i8 0

  declare i32 @llvm.hexagon.S2.cl0(i32) #0
  declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0
  declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0
  declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0
  declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0
  declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0

  define void @f0(i32 %a0) {
  b0:
    %v0 = bitcast [10 x %s.0]* inttoptr (i32 -121502345 to [10 x %s.0]*) to [10 x %s.0]*
    br label %b1

  b1:                                               ; preds = %b5, %b0
    %v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ]
    %v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ]
    %v3 = load i32, i32* @g2, align 4
    %v4 = load i32, i32* @g3, align 8
    %v5 = and i32 %v4, %v3
    %v6 = getelementptr [10 x %s.0], [10 x %s.0]* %v0, i32 0, i32 %v2
    %v7 = bitcast %s.0* %v6 to %s.0*
    %v8 = getelementptr %s.0, %s.0* %v7, i32 0, i32 12
    %v9 = getelementptr %s.0, %s.0* %v7, i32 0, i32 13
    br label %b2

  b2:                                               ; preds = %b4, %b1
    %v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ]
    %v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ]
    %v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11)
    %v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12)
    %v14 = getelementptr [24 x i32], [24 x i32]* %v8, i32 0, i32 %v12
    %v15 = load i32, i32* %v14, align 4
    %v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15)
    %v17 = getelementptr [24 x i32], [24 x i32]* %v9, i32 0, i32 %v12
    %v18 = load i32, i32* %v17, align 4
    %v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18)
    %v20 = load i8, i8* @g4, align 1
    %v21 = and i8 %v20, 1
    %v22 = icmp eq i8 %v21, 0
    br i1 %v22, label %b3, label %b4

  b3:                                               ; preds = %b2
    %v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19)
    store i64 %v23, i64* @g0, align 8
    br label %b4

  b4:                                               ; preds = %b3, %b2
    %v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ]
    %v25 = icmp eq i32 %v13, 0
    br i1 %v25, label %b5, label %b2

  b5:                                               ; preds = %b4
    %v26 = add i32 %v2, 1
    %v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10)
    %v28 = add i32 %v1, 1
    %v29 = icmp eq i32 %v28, %a0
    br i1 %v29, label %b6, label %b1

  b6:                                               ; preds = %b5
    store i64 %v19, i64* @g1, align 8
    ret void
  }

  attributes #0 = { nounwind readnone }

...
---
name:            f0
alignment:       16
tracksRegLiveness: true
registers:
liveins:
  - { reg: '$r0', virtual-reg: '' }
fixedStack:
stack:
constants:
body:             |
  bb.0:
    successors: %bb.1(0x80000000)
    liveins: $r0:0x00000001

    $r3 = A2_tfrsi 0
    $r2 = A2_tfrsi -121502345
    $r4 = A2_tfrsi 10
    J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr

  bb.1 (address-taken):
    successors: %bb.2(0x80000000)
    liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004

    $r5 = M2_mpysip $r3, 1824
    $r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load 4 from @g2)
    $r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load 4 from @g3, align 8)
    $r6 = A2_tfr $r5
    $r7 = A2_and killed $r8, killed $r7
    $r5 = M2_accii killed $r5, $r2, 1248
    $r6 = M2_accii killed $r6, $r2, 1152
    $d0 = A2_tfrpi 0

  bb.2:
    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
    liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004

    $r8 = S2_cl0 $r7
    $r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load 1 from @g4)
    $r7 = S2_setbit_r killed $r7, $r8
    $r9 = L4_loadri_rr $r6, $r8, 2 :: (load 4 from %ir.v14)
    $r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load 4 from %ir.v17)
    $d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf
    $p0 = S2_tstbit_i killed $r12, 0
    $d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf
    $p1 = C2_cmpeqi $r7, 0
    $d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf
    $d0 = A2_tfrpt $p0, killed $d0, implicit $d0
    S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store 8 into @g0)
    $d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0
    J2_jumpf killed $p1, %bb.2, implicit-def dead $pc

  bb.3:
    successors: %bb.4(0x04000000), %bb.1(0x7c000000)
    liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004

    $r3 = A2_addi killed $r3, 1
    $r3 = A4_modwrapu killed $r3, $r4
    ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0

  bb.4:
    liveins: $r8:0x00000001, $r9:0x00000001

    S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store 8 into @g1)
    PS_jmpret killed $r31, implicit-def dead $pc
...