From f588d073b576aa30c9cad76b2e77160439ad2da5 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 21 Sep 2022 13:06:13 +0000 Subject: [PATCH] First form of fully working SVP64 version --- ...ncs_svp64.s => vpx_get_mb_ss_svp64_real.s} | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) rename media/video/libvpx/{variancefuncs_svp64.s => vpx_get_mb_ss_svp64_real.s} (56%) diff --git a/media/video/libvpx/variancefuncs_svp64.s b/media/video/libvpx/vpx_get_mb_ss_svp64_real.s similarity index 56% rename from media/video/libvpx/variancefuncs_svp64.s rename to media/video/libvpx/vpx_get_mb_ss_svp64_real.s index 86541450..c8cae746 100644 --- a/media/video/libvpx/variancefuncs_svp64.s +++ b/media/video/libvpx/vpx_get_mb_ss_svp64_real.s @@ -1,6 +1,7 @@ .set in, 3 -.set vin, 20 -.set sum, 6 +.set src, 10 +.set prod, 50 +.set sum, 4 .set ctr, 9 @@ -15,20 +16,22 @@ vpx_get_mb_ss_svp64_real: .LFB0: .cfi_startproc - addi 10, in ,-2 - li in, 0 li sum, 0 - li ctr, 4 + li ctr, 8 mtctr ctr - setvl 0,0,64,0,1,1 # Set VL to 64 elements + setvl 0,0,32,0,1,1 # Set VL to 64 elements .L2: - # Load 64 ints from (in) - sv.lha *vin, 0(in) - # equivalent to: for (i = 0; i < 64; i++) sum += in[i] * in[i]; - sv.maddld sum, *vin, *vin, sum - addi in, in, 16 - rldicl in,ctr,0,32 + # Load 32 ints from (in) + sv.lha *src, 0(in) + # equivalent to: for (i = 0; i < 32; i++) vprod[i] = src[i] * src[i]; + sv.mulld *prod, *src, *src + # equivalent to: for (i = 0; i < 32; i++) sum += prod[i]; + sv.add/mr sum, *prod, sum + addi in, in, 64 +# rldicl in,ctr,0,32 bdnz .L2 + li in, 0 + addi in, sum, 0 blr .long 0 .byte 0,0,0,0,0,0,0,0 -- 2.30.2