From: Konstantinos Margaritis Date: Wed, 21 Sep 2022 13:06:13 +0000 (+0000) Subject: First form of fully working SVP64 version X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f588d073b576aa30c9cad76b2e77160439ad2da5;p=openpower-isa.git First form of fully working SVP64 version --- diff --git a/media/video/libvpx/variancefuncs_svp64.s b/media/video/libvpx/variancefuncs_svp64.s deleted file mode 100644 index 86541450..00000000 --- a/media/video/libvpx/variancefuncs_svp64.s +++ /dev/null @@ -1,39 +0,0 @@ -.set in, 3 -.set vin, 20 -.set sum, 6 -.set ctr, 9 - - - .machine libresoc - .file "variancefuncs_svp64.c" - .text - .abiversion 2 - .section ".text" - .align 2 - .globl vpx_get_mb_ss_svp64_real - .type vpx_get_mb_ss_svp64_real, @function -vpx_get_mb_ss_svp64_real: -.LFB0: - .cfi_startproc - addi 10, in ,-2 - li in, 0 - li sum, 0 - li ctr, 4 - mtctr ctr - setvl 0,0,64,0,1,1 # Set VL to 64 elements -.L2: - # Load 64 ints from (in) - sv.lha *vin, 0(in) - # equivalent to: for (i = 0; i < 64; i++) sum += in[i] * in[i]; - sv.maddld sum, *vin, *vin, sum - addi in, in, 16 - rldicl in,ctr,0,32 - bdnz .L2 - blr - .long 0 - .byte 0,0,0,0,0,0,0,0 - .cfi_endproc -.LFE0: - .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real - .ident "GCC: (Debian 8.3.0-6) 8.3.0" - .section .note.GNU-stack,"",@progbits diff --git a/media/video/libvpx/vpx_get_mb_ss_svp64_real.s b/media/video/libvpx/vpx_get_mb_ss_svp64_real.s new file mode 100644 index 00000000..c8cae746 --- /dev/null +++ b/media/video/libvpx/vpx_get_mb_ss_svp64_real.s @@ -0,0 +1,42 @@ +.set in, 3 +.set src, 10 +.set prod, 50 +.set sum, 4 +.set ctr, 9 + + + .machine libresoc + .file "variancefuncs_svp64.c" + .text + .abiversion 2 + .section ".text" + .align 2 + .globl vpx_get_mb_ss_svp64_real + .type vpx_get_mb_ss_svp64_real, @function +vpx_get_mb_ss_svp64_real: +.LFB0: + .cfi_startproc + li sum, 0 + li ctr, 8 + mtctr ctr + setvl 0,0,32,0,1,1 # Set VL to 64 elements +.L2: + # Load 32 ints from (in) + sv.lha *src, 0(in) + # equivalent to: for (i = 0; i < 32; i++) vprod[i] = src[i] * src[i]; + sv.mulld *prod, *src, *src + # equivalent to: for (i = 0; i < 32; i++) sum += prod[i]; + sv.add/mr sum, *prod, sum + addi in, in, 64 +# rldicl in,ctr,0,32 + bdnz .L2 + li in, 0 + addi in, sum, 0 + blr + .long 0 + .byte 0,0,0,0,0,0,0,0 + .cfi_endproc +.LFE0: + .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real + .ident "GCC: (Debian 8.3.0-6) 8.3.0" + .section .note.GNU-stack,"",@progbits