| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK |
| |
| ;------------------------------------------------------------------------------- |
| ; Here we know we can load 128 bits as per dereferenceability and alignment. |
| |
| ; We don't widen scalar loads per-se. |
| define <1 x float> @scalar(<1 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @scalar( |
| ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <1 x float> [[R]] |
| ; |
| %r = load <1 x float>, <1 x float>* %p, align 16 |
| ret <1 x float> %r |
| } |
| |
| ; We don't widen single-element loads, these get scalarized. |
| define <1 x float> @vec_with_1elt(<1 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_1elt( |
| ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <1 x float> [[R]] |
| ; |
| %r = load <1 x float>, <1 x float>* %p, align 16 |
| ret <1 x float> %r |
| } |
| |
| define <2 x float> @vec_with_2elts(<2 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_2elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x float> [[R]] |
| ; |
| %r = load <2 x float>, <2 x float>* %p, align 16 |
| ret <2 x float> %r |
| } |
| |
| define <3 x float> @vec_with_3elts(<3 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_3elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <3 x float> [[R]] |
| ; |
| %r = load <3 x float>, <3 x float>* %p, align 16 |
| ret <3 x float> %r |
| } |
| |
| ; Full-vector load. All good already. |
| define <4 x float> @vec_with_4elts(<4 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_4elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <4 x float> [[R]] |
| ; |
| %r = load <4 x float>, <4 x float>* %p, align 16 |
| ret <4 x float> %r |
| } |
| |
| ; We don't know we can load 256 bits though. |
| define <5 x float> @vec_with_5elts(<5 x float>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_5elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <5 x float> [[R]] |
| ; |
| %r = load <5 x float>, <5 x float>* %p, align 16 |
| ret <5 x float> %r |
| } |
| |
| ;------------------------------------------------------------------------------- |
| |
| ; We can load 128 bits, and the fact that it's underaligned isn't relevant. |
| define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_3elts_underaligned( |
| ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 |
| ; CHECK-NEXT: ret <3 x float> [[R]] |
| ; |
| %r = load <3 x float>, <3 x float>* %p, align 8 |
| ret <3 x float> %r |
| } |
| |
| ; We don't know we can load 128 bits, but since it's aligned, we still can do wide load. |
| ; FIXME: this should still get widened. |
| define <3 x float> @vec_with_3elts_underdereferenceable(<3 x float>* align 16 dereferenceable(12) %p) { |
| ; CHECK-LABEL: @vec_with_3elts_underdereferenceable( |
| ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <3 x float> [[R]] |
| ; |
| %r = load <3 x float>, <3 x float>* %p, align 16 |
| ret <3 x float> %r |
| } |
| |
| ; We can't tell if we can load 128 bits. |
| define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(<3 x float>* align 8 dereferenceable(12) %p) { |
| ; CHECK-LABEL: @vec_with_3elts_underaligned_underdereferenceable( |
| ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 |
| ; CHECK-NEXT: ret <3 x float> [[R]] |
| ; |
| %r = load <3 x float>, <3 x float>* %p, align 8 |
| ret <3 x float> %r |
| } |
| |
| ;------------------------------------------------------------------------------- |
| ; Here we know we can load 256 bits as per dereferenceability and alignment. |
| |
| define <1 x float> @vec_with_1elt_256bits(<1 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_1elt_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <1 x float> [[R]] |
| ; |
| %r = load <1 x float>, <1 x float>* %p, align 32 |
| ret <1 x float> %r |
| } |
| |
| define <2 x float> @vec_with_2elts_256bits(<2 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_2elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <2 x float> [[R]] |
| ; |
| %r = load <2 x float>, <2 x float>* %p, align 32 |
| ret <2 x float> %r |
| } |
| |
| define <3 x float> @vec_with_3elts_256bits(<3 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_3elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <3 x float> [[R]] |
| ; |
| %r = load <3 x float>, <3 x float>* %p, align 32 |
| ret <3 x float> %r |
| } |
| |
| define <4 x float> @vec_with_4elts_256bits(<4 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_4elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <4 x float> [[R]] |
| ; |
| %r = load <4 x float>, <4 x float>* %p, align 32 |
| ret <4 x float> %r |
| } |
| |
| define <5 x float> @vec_with_5elts_256bits(<5 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_5elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <5 x float> [[R]] |
| ; |
| %r = load <5 x float>, <5 x float>* %p, align 32 |
| ret <5 x float> %r |
| } |
| |
| define <6 x float> @vec_with_6elts_256bits(<6 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_6elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <6 x float>, <6 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <6 x float> [[R]] |
| ; |
| %r = load <6 x float>, <6 x float>* %p, align 32 |
| ret <6 x float> %r |
| } |
| |
| define <7 x float> @vec_with_7elts_256bits(<7 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_7elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <7 x float>, <7 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <7 x float> [[R]] |
| ; |
| %r = load <7 x float>, <7 x float>* %p, align 32 |
| ret <7 x float> %r |
| } |
| |
| ; Full-vector load. All good already. |
| define <8 x float> @vec_with_8elts_256bits(<8 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_8elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <8 x float> [[R]] |
| ; |
| %r = load <8 x float>, <8 x float>* %p, align 32 |
| ret <8 x float> %r |
| } |
| |
| ; We can't tell if we can load more than 256 bits. |
| define <9 x float> @vec_with_9elts_256bits(<9 x float>* align 32 dereferenceable(32) %p) { |
| ; CHECK-LABEL: @vec_with_9elts_256bits( |
| ; CHECK-NEXT: [[R:%.*]] = load <9 x float>, <9 x float>* [[P:%.*]], align 32 |
| ; CHECK-NEXT: ret <9 x float> [[R]] |
| ; |
| %r = load <9 x float>, <9 x float>* %p, align 32 |
| ret <9 x float> %r |
| } |
| |
| ;------------------------------------------------------------------------------- |
| |
| ; Weird types we don't deal with |
| define <2 x i7> @vec_with_two_subbyte_elts(<2 x i7>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_two_subbyte_elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, <2 x i7>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i7> [[R]] |
| ; |
| %r = load <2 x i7>, <2 x i7>* %p, align 16 |
| ret <2 x i7> %r |
| } |
| |
| define <2 x i9> @vec_with_two_nonbyte_sized_elts(<2 x i9>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_two_nonbyte_sized_elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, <2 x i9>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i9> [[R]] |
| ; |
| %r = load <2 x i9>, <2 x i9>* %p, align 16 |
| ret <2 x i9> %r |
| } |
| |
| define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(<2 x i24>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_two_nonpoweroftwo_sized_elts( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, <2 x i24>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i24> [[R]] |
| ; |
| %r = load <2 x i24>, <2 x i24>* %p, align 16 |
| ret <2 x i24> %r |
| } |
| |
| define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_2elts_addressspace( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float> addrspace(2)* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x float> [[R]] |
| ; |
| %r = load <2 x float>, <2 x float> addrspace(2)* %p, align 16 |
| ret <2 x float> %r |
| } |
| |
| ;------------------------------------------------------------------------------- |
| |
| ; Widening these would change the legalized type, so leave them alone. |
| |
| define <2 x i1> @vec_with_2elts_128bits_i1(<2 x i1>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_2elts_128bits_i1( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, <2 x i1>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i1> [[R]] |
| ; |
| %r = load <2 x i1>, <2 x i1>* %p, align 16 |
| ret <2 x i1> %r |
| } |
| define <2 x i2> @vec_with_2elts_128bits_i2(<2 x i2>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_2elts_128bits_i2( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, <2 x i2>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i2> [[R]] |
| ; |
| %r = load <2 x i2>, <2 x i2>* %p, align 16 |
| ret <2 x i2> %r |
| } |
| define <2 x i4> @vec_with_2elts_128bits_i4(<2 x i4>* align 16 dereferenceable(16) %p) { |
| ; CHECK-LABEL: @vec_with_2elts_128bits_i4( |
| ; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, <2 x i4>* [[P:%.*]], align 16 |
| ; CHECK-NEXT: ret <2 x i4> [[R]] |
| ; |
| %r = load <2 x i4>, <2 x i4>* %p, align 16 |
| ret <2 x i4> %r |
| } |