blob: a92b7ebc6d1df9360e4fee2d6985e837696cee13 [file] [log] [blame]
// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
// Floating point matrix/scalar additions.
void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
// CHECK-LABEL: define void @add_matrix_matrix_double(<25 x double> %a, <25 x double> %b, <25 x double> %c)
// CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
a = b + c;
}
void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
// CHECK-LABEL: define void @add_matrix_matrix_float(<6 x float> %a, <6 x float> %b, <6 x float> %c)
// CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[C:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
a = b + c;
}
void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
// CHECK-LABEL: define void @add_matrix_scalar_double_float(<25 x double> %a, float %vf)
// CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
a = a + vf;
}
void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
// CHECK-LABEL: define void @add_matrix_scalar_double_double(<25 x double> %a, double %vd)
// CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> undef, double [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> undef, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
a = a + vd;
}
void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
// CHECK-LABEL: define void @add_matrix_scalar_float_float(<6 x float> %b, float %vf)
// CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
b = b + vf;
}
void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
// CHECK-LABEL: define void @add_matrix_scalar_float_double(<6 x float> %b, double %vd)
// CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> undef, float [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> undef, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
b = b + vd;
}
// Integer matrix/scalar additions
void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
// CHECK-LABEL: define void @add_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b, <27 x i32> %c)
// CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
// CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
a = b + c;
}
void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
// CHECK-LABEL: define void @add_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c)
// CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[C:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
a = b + c;
}
void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
// CHECK-LABEL: define void @add_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
a = a + vs;
}
void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
// CHECK-LABEL: define void @add_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
a = a + vli;
}
void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
// CHECK-LABEL: define void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
// CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
// CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> undef, i32 [[SCALAR_TRUNC]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> undef, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
a = a + vulli;
}
void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
// CHECK-LABEL: define void @add_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
// CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
// CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR_EXT]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
b = vs + b;
}
void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
// CHECK-LABEL: define void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
// CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
b = vli + b;
}
void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
// CHECK-LABEL: define void @add_matrix_scalar_long_long_int_unsigned_long_long
// CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
// CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
// CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> undef, i64 [[SCALAR]], i32 0
// CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> undef, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
b = vulli + b;
}