reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
// RUN: %clang_cc1 -O0 -triple spir -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -O0 -triple spir64 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -O0 -triple spir -fexperimental-new-pass-manager -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -O0 -triple spir64 -fexperimental-new-pass-manager -emit-llvm %s -o - | FileCheck %s

// This file tests that using the _Float16 type with the spir target will not
// use the llvm intrinsics but instead will use the half arithmetic
// instructions directly.

// Previously attempting to use a constant _Float16 with a comparison
// instruction when the target is spir or spir64 lead to an assert being hit.
bool fcmp_const() {
  _Float16 a = 0.0f16;
  const _Float16 b = 1.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16

  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp olt half [[REG1]], 0xH3C00

  // CHECK: [[REG2:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp olt half [[REG2]], 0xH4000

  // CHECK: [[REG3:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp ogt half [[REG3]], 0xH3C00

  // CHECK: [[REG4:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp ogt half [[REG4]], 0xH4200

  // CHECK: [[REG5:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp oeq half [[REG5]], 0xH3C00

  // CHECK: [[REG7:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp oeq half [[REG7]], 0xH4400

  // CHECK: [[REG8:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp une half [[REG8]], 0xH3C00

  // CHECK: [[REG9:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp une half [[REG9]], 0xH4500

  // CHECK: [[REG10:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp ole half [[REG10]], 0xH3C00

  // CHECK: [[REG11:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp ole half [[REG11]], 0xH4600

  // CHECK: [[REG12:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp oge half [[REG12]], 0xH3C00

  // CHECK: [[REG13:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: fcmp oge half [[REG13]], 0xH4700
  return a < b || a < 2.0f16 || a > b || a > 3.0f16 || a == b || a == 4.0f16 ||
         a != b || a != 5.0f16 || a <= b || a <= 6.0f16 || a >= b ||
         a >= 7.0f16;
}

bool fcmp() {
  _Float16 a = 0.0f16;
  _Float16 b = 1.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16
  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG2:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp olt half [[REG1]], [[REG2]]

  // CHECK: [[REG3:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG4:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp ogt half [[REG3]], [[REG4]]

  // CHECK: [[REG5:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG6:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp oeq half [[REG5]], [[REG6]]

  // CHECK: [[REG7:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG8:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp une half [[REG7]], [[REG8]]

  // CHECK: [[REG7:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG8:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp ole half [[REG7]], [[REG8]]

  // CHECK: [[REG7:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG8:%.*]] = load half, half* %b, align 2
  // CHECK-NEXT: fcmp oge half [[REG7]], [[REG8]]
  return a < b || a > b || a == b || a != b || a <= b || a >= b;
}

_Float16 fadd() {
  _Float16 a = 1.0f16;
  const _Float16 b = 2.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16

  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG2:%.*]] = fadd half [[REG1]], 0xH4000
  // CHECK-NEXT: [[REG3:%.*]] = fadd half [[REG2]], 0xH4200
  // CHECK-NEXT: ret half [[REG3]]
  return a + b + 3.0f16;
}

_Float16 fsub() {
  _Float16 a = 1.0f16;
  const _Float16 b = 2.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16

  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG2:%.*]] = fsub half [[REG1]], 0xH4000
  // CHECK-NEXT: [[REG3:%.*]] = fsub half [[REG2]], 0xH4200
  // CHECK-NEXT: ret half [[REG3]]
  return a - b - 3.0f16;
}

// CHECK: define spir_func half @_Z4fmulDF16_(half %arg)
_Float16 fmul(_Float16 arg) {
  _Float16 a = 1.0f16;
  const _Float16 b = 2.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16

  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG2:%.*]] = load half, half* %arg.addr, align 2
  // CHECK-NEXT: [[REG3:%.*]] = fmul half [[REG1]], [[REG2]]
  // CHECK-NEXT: [[REG4:%.*]] = fmul half [[REG3]], 0xH4000
  // CHECK-NEXT: [[REG5:%.*]] = fmul half [[REG4]], 0xH4200
  // CHECK-NEXT: ret half [[REG5]]
  return a * arg * b * 3.0f16;
}

_Float16 fdiv() {
  _Float16 a = 1.0f16;
  const _Float16 b = 2.0f16;

  // CHECK-NOT: llvm.convert.to.fp16
  // CHECK-NOT: llvm.convert.from.fp16

  // CHECK: [[REG1:%.*]] = load half, half* %a, align 2
  // CHECK-NEXT: [[REG2:%.*]] = fdiv half [[REG1]], 0xH4000
  // CHECK-NEXT: [[REG3:%.*]] = fdiv half [[REG2]], 0xH4200
  // CHECK-NEXT: ret half [[REG3]]
  return a / b / 3.0f16;
}