9 .file "variancefuncs_svp64.c"
14 .globl vpx_get_mb_ss_svp64_real
15 .type vpx_get_mb_ss_svp64_real, @function
16 vpx_get_mb_ss_svp64_real:
20 li sum, 0 # Set sum to zero
21 li ctr, 8 # Need 8 iterations of 32 elements
22 mtctr ctr # Set counter special register
23 setvl 0,0,32,0,1,1 # Set VL to 32 elements
25 sv.lha *src, 0(in) # Load 32 ints from (in)
26 # The following pair of sv.mulld,sv.add/mr is equivalent to sv.maddld/mr
27 # equivalent to: for (i = 0; i < 32; i++) vprod[i] = src[i] * src[i];
28 #sv.mulld *prod, *src, *src
29 # equivalent to: for (i = 0; i < 32; i++) sum += prod[i];
30 #sv.add/mr sum, *prod, sum
31 sv.maddld/mr sum, *src, *src, sum
32 addi in, in, 64 # Advance (in) pointer by 64 bytes
33 bdnz .L2 # Loop until CTR is zero
34 mr in, sum # Set r3 to sum
40 .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
41 .ident "GCC: (Debian 8.3.0-6) 8.3.0"
42 .section .note.GNU-stack,"",@progbits