@@ -48,12 +48,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
4848 BLASLONG i , js ;
4949 BLASLONG X ;
5050
51- svint64_t index = svindex_s64 (0LL , lda );
52-
53- FLOAT * ao ;
5451 js = 0 ;
52+ FLOAT * ao ;
53+ #ifdef DOUBLE
54+ svint64_t index = svindex_s64 (0LL , lda );
5555 svbool_t pn = svwhilelt_b64 (js , n );
5656 int n_active = svcntp_b64 (svptrue_b64 (), pn );
57+ #else
58+ svint32_t index = svindex_s32 (0 , lda );
59+ svbool_t pn = svwhilelt_b32 (js , n );
60+ int n_active = svcntp_b32 (svptrue_b32 (), pn );
61+ #endif
5762 do
5863 {
5964 X = posX ;
@@ -68,7 +73,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
6873 do
6974 {
7075 if (X > posY ) {
76+ #ifdef DOUBLE
7177 svfloat64_t aj_vec = svld1_gather_index (pn , ao , index );
78+ #else
79+ svfloat32_t aj_vec = svld1_gather_index (pn , ao , index );
80+ #endif
7281 svst1 (pn , b , aj_vec );
7382 ao ++ ;
7483 b += n_active ;
@@ -113,9 +122,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
113122
114123 posY += n_active ;
115124 js += n_active ;
125+ #ifdef DOUBLE
116126 pn = svwhilelt_b64 (js , n );
117127 n_active = svcntp_b64 (svptrue_b64 (), pn );
118128 } while (svptest_any (svptrue_b64 (), pn ));
129+ #else
130+ pn = svwhilelt_b32 (js , n );
131+ n_active = svcntp_b32 (svptrue_b32 (), pn );
132+ } while (svptest_any (svptrue_b32 (), pn ));
133+ #endif
119134
120135 return 0 ;
121136}
0 commit comments