| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s |
| |
| define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { |
| ; GFX7-LABEL: @uadd_sat_v2i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: ret <2 x i16> [[INS_1]] |
| ; |
| ; GFX8-LABEL: @uadd_sat_v2i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) |
| ; GFX8-NEXT: ret <2 x i16> [[TMP0]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i16> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i16> %arg1, i64 1 |
| %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 |
| ret <2 x i16> %ins.1 |
| } |
| |
| define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { |
| ; GFX7-LABEL: @usub_sat_v2i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: ret <2 x i16> [[INS_1]] |
| ; |
| ; GFX8-LABEL: @usub_sat_v2i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) |
| ; GFX8-NEXT: ret <2 x i16> [[TMP0]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i16> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i16> %arg1, i64 1 |
| %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 |
| ret <2 x i16> %ins.1 |
| } |
| |
| define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { |
| ; GFX7-LABEL: @sadd_sat_v2i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: ret <2 x i16> [[INS_1]] |
| ; |
| ; GFX8-LABEL: @sadd_sat_v2i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) |
| ; GFX8-NEXT: ret <2 x i16> [[TMP0]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i16> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i16> %arg1, i64 1 |
| %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 |
| ret <2 x i16> %ins.1 |
| } |
| |
| define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { |
| ; GFX7-LABEL: @ssub_sat_v2i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: ret <2 x i16> [[INS_1]] |
| ; |
| ; GFX8-LABEL: @ssub_sat_v2i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) |
| ; GFX8-NEXT: ret <2 x i16> [[TMP0]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i16> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i16> %arg1, i64 1 |
| %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 |
| ret <2 x i16> %ins.1 |
| } |
| |
| define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { |
| ; GCN-LABEL: @uadd_sat_v2i32( |
| ; GCN-NEXT: bb: |
| ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 |
| ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 |
| ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) |
| ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) |
| ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 |
| ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 |
| ; GCN-NEXT: ret <2 x i32> [[INS_1]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i32> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i32> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i32> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i32> %arg1, i64 1 |
| %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0) |
| %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1) |
| %ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 |
| ret <2 x i32> %ins.1 |
| } |
| |
| define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { |
| ; GCN-LABEL: @usub_sat_v2i32( |
| ; GCN-NEXT: bb: |
| ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 |
| ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 |
| ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) |
| ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) |
| ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 |
| ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 |
| ; GCN-NEXT: ret <2 x i32> [[INS_1]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i32> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i32> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i32> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i32> %arg1, i64 1 |
| %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0) |
| %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1) |
| %ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 |
| ret <2 x i32> %ins.1 |
| } |
| |
| define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { |
| ; GCN-LABEL: @sadd_sat_v2i32( |
| ; GCN-NEXT: bb: |
| ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 |
| ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 |
| ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) |
| ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) |
| ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 |
| ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 |
| ; GCN-NEXT: ret <2 x i32> [[INS_1]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i32> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i32> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i32> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i32> %arg1, i64 1 |
| %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0) |
| %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1) |
| %ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 |
| ret <2 x i32> %ins.1 |
| } |
| |
| define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { |
| ; GCN-LABEL: @ssub_sat_v2i32( |
| ; GCN-NEXT: bb: |
| ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 |
| ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 |
| ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 |
| ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) |
| ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) |
| ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0 |
| ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 |
| ; GCN-NEXT: ret <2 x i32> [[INS_1]] |
| ; |
| bb: |
| %arg0.0 = extractelement <2 x i32> %arg0, i64 0 |
| %arg0.1 = extractelement <2 x i32> %arg0, i64 1 |
| %arg1.0 = extractelement <2 x i32> %arg1, i64 0 |
| %arg1.1 = extractelement <2 x i32> %arg1, i64 1 |
| %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0) |
| %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1) |
| %ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0 |
| %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 |
| ret <2 x i32> %ins.1 |
| } |
| |
| define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { |
| ; GFX7-LABEL: @uadd_sat_v3i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 |
| ; GFX7-NEXT: ret <3 x i16> [[INS_2]] |
| ; |
| ; GFX8-LABEL: @uadd_sat_v3i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 |
| ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 |
| ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> <i32 0, i32 1> |
| ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1> |
| ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) |
| ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) |
| ; GFX8-NEXT: [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef> |
| ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2 |
| ; GFX8-NEXT: ret <3 x i16> [[INS_2]] |
| ; |
| bb: |
| %arg0.0 = extractelement <3 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <3 x i16> %arg0, i64 1 |
| %arg0.2 = extractelement <3 x i16> %arg0, i64 2 |
| %arg1.0 = extractelement <3 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <3 x i16> %arg1, i64 1 |
| %arg1.2 = extractelement <3 x i16> %arg1, i64 2 |
| %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) |
| %ins.0 = insertelement <3 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1 |
| %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2 |
| ret <3 x i16> %ins.2 |
| } |
| |
| define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { |
| ; GFX7-LABEL: @uadd_sat_v4i16( |
| ; GFX7-NEXT: bb: |
| ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 |
| ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2 |
| ; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 |
| ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 |
| ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 |
| ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2 |
| ; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 |
| ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) |
| ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) |
| ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) |
| ; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) |
| ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0 |
| ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 |
| ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 |
| ; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 |
| ; GFX7-NEXT: ret <4 x i16> [[INS_3]] |
| ; |
| ; GFX8-LABEL: @uadd_sat_v4i16( |
| ; GFX8-NEXT: bb: |
| ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1> |
| ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1> |
| ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) |
| ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3> |
| ; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3> |
| ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) |
| ; GFX8-NEXT: [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; GFX8-NEXT: ret <4 x i16> [[INS_32]] |
| ; |
| bb: |
| %arg0.0 = extractelement <4 x i16> %arg0, i64 0 |
| %arg0.1 = extractelement <4 x i16> %arg0, i64 1 |
| %arg0.2 = extractelement <4 x i16> %arg0, i64 2 |
| %arg0.3 = extractelement <4 x i16> %arg0, i64 3 |
| %arg1.0 = extractelement <4 x i16> %arg1, i64 0 |
| %arg1.1 = extractelement <4 x i16> %arg1, i64 1 |
| %arg1.2 = extractelement <4 x i16> %arg1, i64 2 |
| %arg1.3 = extractelement <4 x i16> %arg1, i64 3 |
| %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) |
| %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) |
| %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) |
| %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3) |
| %ins.0 = insertelement <4 x i16> poison, i16 %add.0, i64 0 |
| %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1 |
| %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2 |
| %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3 |
| ret <4 x i16> %ins.3 |
| } |
| |
| declare i16 @llvm.uadd.sat.i16(i16, i16) #0 |
| declare i16 @llvm.usub.sat.i16(i16, i16) #0 |
| declare i16 @llvm.sadd.sat.i16(i16, i16) #0 |
| declare i16 @llvm.ssub.sat.i16(i16, i16) #0 |
| |
| declare i32 @llvm.uadd.sat.i32(i32, i32) #0 |
| declare i32 @llvm.usub.sat.i32(i32, i32) #0 |
| declare i32 @llvm.sadd.sat.i32(i32, i32) #0 |
| declare i32 @llvm.ssub.sat.i32(i32, i32) #0 |
| |
| attributes #0 = { nounwind readnone speculatable willreturn } |