pysvp64db: fix traversal
[openpower-isa.git] / media / video / libvpx / vpx_get_mb_ss_svp64_real.s
1 .set in, 3
2 .set src, 10
3 .set prod, 50
4 .set sum, 4
5 .set ctr, 9
6
7
8 .machine libresoc
9 .file "variancefuncs_svp64.c"
10 .text
11 .abiversion 2
12 .section ".text"
13 .align 2
14 .globl vpx_get_mb_ss_svp64_real
15 .type vpx_get_mb_ss_svp64_real, @function
16 vpx_get_mb_ss_svp64_real:
17 .LFB0:
18 .cfi_startproc
19 # Set sum to zero
20 li sum, 0 # Set sum to zero
21 li ctr, 8 # Need 8 iterations of 32 elements
22 mtctr ctr # Set counter special register
23 setvl 0,0,32,0,1,1 # Set VL to 32 elements
24 .L2:
25 sv.lha *src, 0(in) # Load 32 ints from (in)
26 # The following pair of sv.mulld,sv.add/mr is equivalent to sv.maddld/mr
27 # equivalent to: for (i = 0; i < 32; i++) vprod[i] = src[i] * src[i];
28 #sv.mulld *prod, *src, *src
29 # equivalent to: for (i = 0; i < 32; i++) sum += prod[i];
30 #sv.add/mr sum, *prod, sum
31 sv.maddld/mr sum, *src, *src, sum
32 addi in, in, 64 # Advance (in) pointer by 64 bytes
33 bdnz .L2 # Loop until CTR is zero
34 mr in, sum # Set r3 to sum
35 blr
36 .long 0
37 .byte 0,0,0,0,0,0,0,0
38 .cfi_endproc
39 .LFE0:
40 .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
41 .ident "GCC: (Debian 8.3.0-6) 8.3.0"
42 .section .note.GNU-stack,"",@progbits