reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

; Test the (concat_vectors (bitcast (scalar)), ..) pattern.

define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
entry:
; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
; CHECK-NEXT: dup.4h v0, w0
; CHECK-NEXT: ret
  %t = trunc i32 %x to i16
  %0 = bitcast i16 %t to <2 x i8>
  %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  ret <8 x i8> %1
}

define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
entry:
; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
; CHECK-NEXT: dup.2s v0, w0
; CHECK-NEXT: ret
  %0 = bitcast i32 %x to <4 x i8>
  %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  ret <8 x i8> %1
}

define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
entry:
; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
; CHECK-NEXT: dup.4s v0, w0
; CHECK-NEXT: ret
  %0 = bitcast i32 %x to <2 x i16>
  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
  ret <8 x i16> %1
}

define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
entry:
; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov.h v0[1], w1
; CHECK-NEXT: mov.h v0[3], w1
; CHECK-NEXT: ret
  %tx = trunc i32 %x to i16
  %ty = trunc i32 %y to i16
  %bx = bitcast i16 %tx to <2 x i8>
  %by = bitcast i16 %ty to <2 x i8>
  %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
  ret <8 x i8> %r
}

define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
entry:
; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
; CHECK-NEXT: fmov s0, w1
; CHECK-NEXT: mov.s v0[1], w0
; CHECK-NEXT: ret
  %bx = bitcast i32 %x to <4 x i8>
  %by = bitcast i32 %y to <4 x i8>
  %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  ret <8 x i8> %r
}

define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
entry:
; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov.s v0[1], w1
; CHECK-NEXT: mov.s v0[2], w1
; CHECK-NEXT: mov.s v0[3], w0
; CHECK-NEXT: ret
  %bx = bitcast i32 %x to <2 x i16>
  %by = bitcast i32 %y to <2 x i16>
  %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
  ret <8 x i16> %r
}

; Also make sure we minimize bitcasts.

; This is a pretty artificial testcase: make sure we bitcast to floating-point
; if any of the scalars is floating-point.
define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
entry:
; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
; CHECK-NEXT: mov.16b v0, v[[X]]
; CHECK-NEXT: mov.h v0[1], v1[0]
; CHECK-NEXT: mov.h v0[2], v[[X]][0]
; CHECK-NEXT: mov.h v0[3], v1[0]
; CHECK-NEXT: ret
  %t = trunc i32 %x to i16
  %0 = bitcast i16 %t to <2 x i8>
  %y0 = bitcast half %y to <2 x i8>
  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  ret <8 x i8> %1
}

define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
entry:
; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
; CHECK-NEXT: mov.16b v0, v1
; CHECK-NEXT: mov.h v0[1], v2[0]
; CHECK-NEXT: mov.h v0[2], v1[0]
; CHECK-NEXT: mov.h v0[3], v2[0]
; CHECK-NEXT: ret
  %0 = bitcast half %x to <2 x i8>
  %y0 = bitcast half %y to <2 x i8>
  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = bitcast <8 x i8> %1 to <2 x float>
  ret <2 x float> %2
}

define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
entry:
; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
; CHECK-NEXT: dup.4s v0, v0[0]
; CHECK-NEXT: ret
  %0 = bitcast float %x to <2 x i16>
  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
  %2 = bitcast <8 x i16> %1 to <4 x float>
  ret <4 x float> %2
}

attributes #0 = { nounwind }