tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
for (unsigned i = 0; i < 4; i++)
{
+ unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
build_int_cst (size_type_node, i),
NULL_TREE, NULL_TREE);
tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
- build_int_cst (dst_type, i * 16));
+ build_int_cst (dst_type, index * 16));
gimplify_assign (dst, ref, &new_seq);
}
pop_gimplify_context (NULL);
#define SAVE_ACC(ACC, ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
void
MMA (int m, int n, int k, double *A, double *B, double *C)
#define SAVE_ACC(ACC, ldc,J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
#define SAVE_ACC1(ACC,ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[4* ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[5*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[6*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[7*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
void
MMA (int m, int n, int k, float *A, float *B, float *C)
{