pysvp64db: fix traversal
[openpower-isa.git] / media / video / libvpx / vpx_get4x4sse_cs_svp64_real.s
1 .set src_ptr, 3
2 .set src_stride, 4
3 .set ref_ptr, 5
4 .set ref_stride, 6
5 .set sum, 7
6 .set src, 10
7 .set ref, 26
8 .set diff, 42
9 .set prod, 58
10 .set ctr, 9
11
12 .machine libresoc
13 .file "vpx_get4x4sse_cs_svp64_real.c"
14 .abiversion 2
15 .section ".text"
16 .align 2
17 .globl vpx_get4x4sse_cs_svp64_real
18 .type vpx_get4x4sse_cs_svp64_real, @function
19 vpx_get4x4sse_cs_svp64_real:
20 .LFB0:
21 .cfi_startproc
22 # Set sum to zero
23 li sum, 0 # Set sum to zero
24 sldi src_stride, src_stride, 1 # strides are for 16-bit elements
25 sldi ref_stride, ref_stride, 1 # we need to increase by bytes
26 # Load 16 elements from src_ptr and ref_ptr, at groups of 4 with stride
27 setvl 0,0,4,0,1,1 # Set VL to 4 elements
28 sv.lha *src, 0(src_ptr) # Load 4 ints from (src_ptr)
29 add src_ptr, src_ptr, src_stride # Advance src_ptr by src_stride
30 sv.lha *src + 4, 0(src_ptr)
31 add src_ptr, src_ptr, src_stride
32 sv.lha *src + 8, 0(src_ptr)
33 add src_ptr, src_ptr, src_stride
34 sv.lha *src + 12, 0(src_ptr)
35 sv.lha *ref, 0(ref_ptr) # Load 4 ints from (ref_ptr)
36 add ref_ptr, ref_ptr, ref_stride # Advance ref_ptr by ref_stride
37 sv.lha *ref + 4, 0(ref_ptr)
38 add ref_ptr, ref_ptr, ref_stride
39 sv.lha *ref + 8, 0(ref_ptr)
40 add ref_ptr, ref_ptr, ref_stride
41 sv.lha *ref + 12, 0(ref_ptr)
42
43 # now our values are in consecutive registers and we can set VL to 16 elements
44 setvl 0,0,16,0,1,1
45 # equivalent to: for (i = 0; i < 16; i++) diff[i] = src[i] - ref[i];
46 sv.subf *diff, *src, *ref
47 # equivalent to: for (i = 0; i < 16; i++) prod[i] = diff[i] * diff[i];
48 sv.mulld *prod, *diff, *diff
49 # equivalent to: for (i = 0; i < 16; i++) sum += prod[i];
50 sv.add/mr sum, *prod, sum
51 mr 3, sum # Set r3 to sum
52 blr
53 .long 0
54 .byte 0,0,0,0,0,0,0,0
55 .cfi_endproc
56 .LFE0:
57 .size vpx_get4x4sse_cs_svp64_real,.-vpx_get4x4sse_cs_svp64_real
58 .ident "GCC: (Debian 8.3.0-6) 8.3.0"
59 .section .note.GNU-stack,"",@progbits