First form of fully working SVP64 version
authorKonstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Wed, 21 Sep 2022 13:06:13 +0000 (13:06 +0000)
committerKonstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Wed, 21 Sep 2022 13:06:13 +0000 (13:06 +0000)
media/video/libvpx/variancefuncs_svp64.s [deleted file]
media/video/libvpx/vpx_get_mb_ss_svp64_real.s [new file with mode: 0644]

diff --git a/media/video/libvpx/variancefuncs_svp64.s b/media/video/libvpx/variancefuncs_svp64.s
deleted file mode 100644 (file)
index 8654145..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-.set in, 3
-.set vin, 20
-.set sum, 6
-.set ctr, 9
-
-
-       .machine libresoc
-       .file   "variancefuncs_svp64.c"
-       .text
-       .abiversion 2
-       .section        ".text"
-       .align 2
-       .globl vpx_get_mb_ss_svp64_real
-       .type   vpx_get_mb_ss_svp64_real, @function
-vpx_get_mb_ss_svp64_real:
-.LFB0:
-       .cfi_startproc
-       addi 10, in ,-2
-       li in, 0
-       li sum, 0
-       li ctr, 4
-       mtctr ctr
-       setvl 0,0,64,0,1,1                      # Set VL to 64 elements
-.L2:
-       # Load 64 ints from (in)
-       sv.lha          *vin, 0(in)
-       # equivalent to: for (i = 0; i < 64; i++) sum += in[i] * in[i];
-       sv.maddld       sum, *vin, *vin, sum
-       addi in, in, 16
-       rldicl in,ctr,0,32
-       bdnz .L2
-       blr
-       .long 0
-       .byte 0,0,0,0,0,0,0,0
-       .cfi_endproc
-.LFE0:
-       .size   vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
-       .ident  "GCC: (Debian 8.3.0-6) 8.3.0"
-       .section        .note.GNU-stack,"",@progbits
diff --git a/media/video/libvpx/vpx_get_mb_ss_svp64_real.s b/media/video/libvpx/vpx_get_mb_ss_svp64_real.s
new file mode 100644 (file)
index 0000000..c8cae74
--- /dev/null
@@ -0,0 +1,42 @@
+.set in, 3
+.set src, 10
+.set prod, 50
+.set sum, 4
+.set ctr, 9
+
+
+       .machine libresoc
+       .file   "variancefuncs_svp64.c"
+       .text
+       .abiversion 2
+       .section        ".text"
+       .align 2
+       .globl vpx_get_mb_ss_svp64_real
+       .type   vpx_get_mb_ss_svp64_real, @function
+vpx_get_mb_ss_svp64_real:
+.LFB0:
+       .cfi_startproc
+       li sum, 0
+       li ctr, 8
+       mtctr ctr
+       setvl 0,0,32,0,1,1                      # Set VL to 64 elements
+.L2:
+       # Load 32 ints from (in)
+       sv.lha          *src, 0(in)
+       # equivalent to: for (i = 0; i < 32; i++) vprod[i] = src[i] * src[i];
+       sv.mulld        *prod, *src, *src
+       # equivalent to: for (i = 0; i < 32; i++) sum += prod[i];
+       sv.add/mr       sum, *prod, sum
+       addi in, in, 64
+#      rldicl in,ctr,0,32
+       bdnz .L2
+       li in, 0
+       addi in, sum, 0
+       blr
+       .long 0
+       .byte 0,0,0,0,0,0,0,0
+       .cfi_endproc
+.LFE0:
+       .size   vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
+       .ident  "GCC: (Debian 8.3.0-6) 8.3.0"
+       .section        .note.GNU-stack,"",@progbits