@@ -26,7 +26,7 @@ Except for m8n8k4, all other shapes are supported for row/col layout of A/B matr
2626
2727__global__ void mma_kernel_m8n8k4 (int *a, int *b, float *c) {
2828 // CHECK: {
29- // CHECK-NEXT: volatile float *d_mat_frag_ct1[8] = { &c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7] };
29+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[8] = { &c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7] };
3030 // CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(a[0], a[1]);
3131 // CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
3232 // CHECK-NEXT: sycl::vec<float, 8> c_mat_frag_ct1(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]);
@@ -44,7 +44,7 @@ __global__ void mma_kernel_m8n8k4(int *a, int *b, float *c) {
4444
4545__global__ void mma_kernel_m8n8k16 (int *a, int *b, int *c, int *d) {
4646 // CHECK: {
47- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[2] = { &d[0], &d[1] };
47+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[2] = { &d[0], &d[1] };
4848 // CHECK-NEXT: sycl::vec<uint32_t, 1> a_mat_frag_ct1(a[0]);
4949 // CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(b[0]);
5050 // CHECK-NEXT: sycl::vec<int32_t, 2> c_mat_frag_ct1(c[0], c[1]);
@@ -63,7 +63,7 @@ __global__ void mma_kernel_m8n8k16(int *a, int *b, int *c, int *d) {
6363
6464__global__ void mma_kernel_m16n8k8 (int *a, int *b, float *fc, float *fd) {
6565 // CHECK: {
66- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
66+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
6767 // CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(*(reinterpret_cast<int *>(&a[0])), *(reinterpret_cast<int *>(&a[1])));
6868 // CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(*(reinterpret_cast<int *>(&b[0])));
6969 // CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -81,7 +81,7 @@ __global__ void mma_kernel_m16n8k8(int *a, int *b, float *fc, float *fd) {
8181 " f" (fc[0 ]), " f" (fc[1 ]), " f" (fc[2 ]), " f" (fc[3 ]));
8282
8383 // CHECK: {
84- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
84+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
8585 // CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(*(reinterpret_cast<int *>(&a[0])), *(reinterpret_cast<int *>(&a[1])));
8686 // CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(*(reinterpret_cast<int *>(&b[0])));
8787 // CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -101,7 +101,7 @@ __global__ void mma_kernel_m16n8k8(int *a, int *b, float *fc, float *fd) {
101101
102102__global__ void mma_kernel_m16n8k16 (int *a, int *b, int *c, float *fc, int *d) {
103103 // CHECK: {
104- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fc[0], &fc[1], &fc[2], &fc[3] };
104+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fc[0], &fc[1], &fc[2], &fc[3] };
105105 // CHECK-NEXT: sycl::vec<uint32_t, 4> a_mat_frag_ct1(a[0], a[1], a[2], a[3]);
106106 // CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
107107 // CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -117,7 +117,7 @@ __global__ void mma_kernel_m16n8k16(int *a, int *b, int *c, float *fc, int *d) {
117117 " r" (b[0 ]), " r" (b[1 ]));
118118
119119 // CHECK: {
120- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
120+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
121121 // CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(a[0], a[1]);
122122 // CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(b[0]);
123123 // CHECK-NEXT: sycl::vec<int32_t, 4> c_mat_frag_ct1(c[0], c[1], c[2], c[3]);
@@ -136,7 +136,7 @@ __global__ void mma_kernel_m16n8k16(int *a, int *b, int *c, float *fc, int *d) {
136136
137137__global__ void mma_kernel_m16n8k32 (int *a, int *b, int *c, int *d) {
138138 // CHECK: {
139- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
139+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
140140 // CHECK-NEXT: sycl::vec<uint32_t, 4> a_mat_frag_ct1(a[0], a[1], a[2], a[3]);
141141 // CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
142142 // CHECK-NEXT: sycl::vec<int32_t, 4> c_mat_frag_ct1(c[0], c[1], c[2], c[3]);
0 commit comments