pysvp64db: fix traversal
[openpower-isa.git] / media / video / libvpx / variance_svp64_real.s
1 .set src_ptr, 3
2 .set src_stride, 4
3 .set ref_ptr, 5
4 .set ref_stride, 6
5 .set width, 7
6 .set height, 8
7 .set sse_ptr, 9
8 .set sum_ptr, 10
9 .set sum, 11
10 .set sse, 12
11 .set ctr, 13
12 .set src_col, 14
13 .set ref_col, 15
14 .set row, 16
15 .set src, 20
16 .set ref, 24
17 .set diff, 28
18 .set prod, 32
19
20 .machine libresoc
21 .file "variance_svp64_real.c"
22 .abiversion 2
23 .section ".text"
24 .align 2
25 .globl variance_svp64_real
26 .type variance_svp64_real, @function
27 variance_svp64_real:
28 .LFB0:
29 .cfi_startproc
30 li sum, 0 # Set sum to zero
31 li sse, 0 # Set sse to zero
32 li row, 0 # Set row to zero
33 sldi src_stride, src_stride, 1 # strides are for 16-bit elements
34 sldi ref_stride, ref_stride, 1 # we need to increase by bytes
35 srdi width, width, 2 # We load groups of 4
36 setvl 0,0,4,0,1,1 # Set VL to 4 elements
37
38 .L1: # outer loop: for (r=0; r < h; r++)
39 mr src_col, src_ptr # Temporary variables
40 mr ref_col, ref_ptr
41 mr ctr, width # Set up CTR to width/4 -1 on each row
42 mtctr ctr # Set up counter
43 .L2: # inner loop: for (c=0; c < w; c += 4)
44 # Load 4 elements from src_ptr and ref_ptr
45 sv.lha *src, 0(src_col) # Load 4 ints from (src_ptr)
46 sv.lha *ref, 0(ref_col) # Load 4 ints from (ref_ptr)
47
48 # equivalent to: for (i = 0; i < 4; i++) diff[i] = src[i] - ref[i];
49 sv.subf *diff, *ref, *src
50 # equivalent to: for (i = 0; i < 4; i++) prod[i] = diff[i] * diff[i];
51 sv.mulld *prod, *diff, *diff
52 # equivalent to: for (i = 0; i < 4; i++) sum += diff[i];
53 sv.add/mr sum, *diff, sum
54 # equivalent to: for (i = 0; i < 4; i++) sse += diff[i]*diff[i];
55 sv.add/mr sse, *prod, sse
56
57 addi src_col, src_col, 8 # Increment src, ref by 8 bytes
58 addi ref_col, ref_col, 8
59 bdnz .L2 # Loop until CTR is zero
60
61 add src_ptr, src_ptr, src_stride # Advance src_ptr by src_stride
62 add ref_ptr, ref_ptr, ref_stride # Advance ref_ptr by ref_stride
63 addi row, row, 1 # Add 1 to row
64 cmpw cr1, row, height # Is row equal to height?
65 bne cr1, .L1 # Go back to L1 if not done
66 std sum, 0(sum_ptr) # Set (sum_ptr) to sum
67 std sse, 0(sse_ptr) # Set (sum_ptr) to sum
68 blr
69 .long 0
70 .byte 0,0,0,0,0,3,0,0
71 .cfi_endproc
72 .LFE0:
73 .size variance_svp64_real,.-variance_svp64_real
74 .ident "GCC: (Debian 8.3.0-6) 8.3.0"
75 .section .note.GNU-stack,"",@progbits