| ; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s |
| |
| ; Check that the load/store vectorizer is willing to move loads/stores across |
| ; intervening instructions only if it's safe. |
| ; |
| ; - Loads can be moved across instructions that don't write or throw. |
| ; - Stores can only be moved across instructions which don't read, write, or |
| ; throw. |
| |
| declare void @fn() |
| declare void @fn_nounwind() #0 |
| declare void @fn_nounwind_writeonly() #1 |
| declare void @fn_nounwind_readonly() #2 |
| declare void @fn_writeonly() #3 |
| declare void @fn_readonly() #4 |
| declare void @fn_readnone() #5 |
| |
| ; CHECK-LABEL: @load_fn |
| ; CHECK: load |
| ; CHECK: call void @fn() |
| ; CHECK: load |
| define void @load_fn(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn() |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_nounwind |
| ; CHECK: load |
| ; CHECK: call void @fn_nounwind() |
| ; CHECK: load |
| define void @load_fn_nounwind(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_nounwind() #0 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_nounwind_writeonly |
| ; CHECK: load |
| ; CHECK: call void @fn_nounwind_writeonly() |
| ; CHECK: load |
| define void @load_fn_nounwind_writeonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_nounwind_writeonly() #1 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_nounwind_readonly |
| ; CHECK-DAG: load <2 x i32> |
| ; CHECK-DAG: call void @fn_nounwind_readonly() |
| define void @load_fn_nounwind_readonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_nounwind_readonly() #2 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_readonly |
| ; CHECK: load |
| ; CHECK: call void @fn_readonly |
| ; CHECK: load |
| define void @load_fn_readonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_readonly() #4 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_writeonly |
| ; CHECK: load |
| ; CHECK: call void @fn_writeonly() |
| ; CHECK: load |
| define void @load_fn_writeonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_writeonly() #3 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @load_fn_readnone |
| ; CHECK-DAG: load <2 x i32> |
| ; CHECK-DAG: call void @fn_readnone() |
| define void @load_fn_readnone(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| %v0 = load i32, ptr %p, align 8 |
| call void @fn_readnone() #5 |
| %v1 = load i32, ptr %p.1, align 4 |
| ret void |
| } |
| |
| ; ------------------------------------------------ |
| ; Same tests, but now for stores instead of loads. |
| ; ------------------------------------------------ |
| |
| ; CHECK-LABEL: @store_fn |
| ; CHECK: store |
| ; CHECK: call void @fn() |
| ; CHECK: store |
| define void @store_fn(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn() |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @store_fn_nounwind |
| ; CHECK: store |
| ; CHECK: call void @fn_nounwind() |
| ; CHECK: store |
| define void @store_fn_nounwind(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn_nounwind() #0 |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @store_fn_nounwind_writeonly |
| ; CHECK: store |
| ; CHECK: call void @fn_nounwind_writeonly() |
| ; CHECK: store |
| define void @store_fn_nounwind_writeonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn_nounwind_writeonly() #1 |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @store_fn_nounwind_readonly |
| ; CHECK: store |
| ; CHECK: call void @fn_nounwind_readonly() |
| ; CHECK: store |
| define void @store_fn_nounwind_readonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn_nounwind_readonly() #2 |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @store_fn_readonly |
| ; CHECK: store |
| ; CHECK: call void @fn_readonly |
| ; CHECK: store |
| define void @store_fn_readonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn_readonly() #4 |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; CHECK-LABEL: @store_fn_writeonly |
| ; CHECK: store |
| ; CHECK: call void @fn_writeonly() |
| ; CHECK: store |
| define void @store_fn_writeonly(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p |
| call void @fn_writeonly() #3 |
| store i32 0, ptr %p.1 |
| ret void |
| } |
| |
| ; This is the only store idiom we can vectorize. |
| ; CHECK-LABEL: @store_fn_readnone |
| ; CHECK-DAG: store <2 x i32> |
| ; CHECK-DAG: call void @fn_readnone() |
| define void @store_fn_readnone(ptr %p) #0 { |
| %p.1 = getelementptr i32, ptr %p, i32 1 |
| |
| store i32 0, ptr %p, align 8 |
| call void @fn_readnone() #5 |
| store i32 0, ptr %p.1, align 8 |
| ret void |
| } |
| |
| |
| attributes #0 = { nounwind willreturn } |
| attributes #1 = { nounwind willreturn writeonly } |
| attributes #2 = { nounwind readonly willreturn } |
| attributes #3 = { writeonly } |
| attributes #4 = { readonly } |
| ; readnone implies nounwind, so no need to test separately |
| attributes #5 = { nounwind willreturn readnone } |