reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s

; An optimization in DAG Combiner to fold
; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
; will generate nodes like:
;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
; And such nodes will be defaultly scalarized in type legalization. But such
; scalarization will cause an assertion failure, as v1i64 is a legal type in
; AArch64. We change the default behaviour from be scalarized to be widen.

; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
; Just like v1i16 and v1i8, there is no XTN generated.

define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i32_0:
; CHECK: xtn v0.2s, v0.2d
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
  %2 = trunc <2 x i64> %1 to <2 x i32>
  ret <2 x i32> %2
}

define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i32_1:
; CHECK: xtn v0.2s, v0.2d
; CHECK-NEXT: dup v0.2s, v0.s[0]
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
  %2 = trunc <2 x i64> %1 to <2 x i32>
  ret <2 x i32> %2
}

define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i16_0:
; CHECK-NOT: xtn
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  %2 = trunc <4 x i64> %1 to <4 x i16>
  ret <4 x i16> %2
}

define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i16_1:
; CHECK-NOT: xtn
; CHECK: dup v0.4h, v0.h[0]
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
  %2 = trunc <4 x i64> %1 to <4 x i16>
  ret <4 x i16> %2
}

define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i8_0:
; CHECK-NOT: xtn
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %2 = trunc <8 x i64> %1 to <8 x i8>
  ret <8 x i8> %2
}

define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i8_1:
; CHECK-NOT: xtn
; CHECK: dup v0.8b, v0.b[0]
  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %2 = trunc <8 x i64> %1 to <8 x i8>
  ret <8 x i8> %2
}

; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
; the i64 out of the v1i64 operand, and truncate that scalar instead.

define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i1_0:
; CHECK: fmov w0, s0
  %1 = trunc <1 x i64> %in0 to <1 x i1>
  ret <1 x i1> %1
}

define i1 @test_v1i1_1(<1 x i64> %in0) {
; CHECK-LABEL: test_v1i1_1:
; CHECK: fmov [[REG:w[0-9]+]], s0
  %1 = trunc <1 x i64> %in0 to <1 x i1>
; CHECK: and w0, [[REG]], #0x1
  %2 = extractelement <1 x i1> %1, i32 0
  ret i1 %2
}