From 22d4c56d566c41e3a8ad4c69cc57240509960a75 Mon Sep 17 00:00:00 2001 From: Dmitry Selyutin Date: Wed, 12 Apr 2023 21:16:18 +0300 Subject: [PATCH] media: migrate to binutils --- media/Makefile | 5 +- .../mp3/mp3_0_apply_window_float_basicsv.s | 2 +- media/audio/mp3/mp3_1_imdct36_float_basicsv.s | 22 +++---- media/video/libvpx/vp8_dct4x4_real.s | 59 ++++++++++--------- 4 files changed, 45 insertions(+), 43 deletions(-) diff --git a/media/Makefile b/media/Makefile index d2694e26..6c33dac1 100644 --- a/media/Makefile +++ b/media/Makefile @@ -5,7 +5,8 @@ AS = $(CROSS)as LD = $(CROSS)ld OBJCOPY = $(CROSS)objcopy -AFLAGS ?= -mpwr9 +PYSVP64TRANS ?= cp +AFLAGS ?= -mlibresoc EXPECTED_VER = 1 VER = $(shell cat data/VERSION) @@ -31,7 +32,7 @@ wget: tar -C data -xvf media-test-data.txz %.bin: %.s - pysvp64asm $< $<.sv + $(PYSVP64TRANS) $< $<.sv $(AS) $(AFLAGS) -c $<.sv -le -o $<.o $(LD) $<.o -EL -o $<.elf -T memmap $(OBJCOPY) $<.elf -I elf64-little -O binary $@ diff --git a/media/audio/mp3/mp3_0_apply_window_float_basicsv.s b/media/audio/mp3/mp3_0_apply_window_float_basicsv.s index 5999bef1..0ba32518 100644 --- a/media/audio/mp3/mp3_0_apply_window_float_basicsv.s +++ b/media/audio/mp3/mp3_0_apply_window_float_basicsv.s @@ -32,7 +32,7 @@ .set sum, 0 .set sum2, 1 - .machine power9 + .machine libresoc .abiversion 2 .section ".text" .align 2 diff --git a/media/audio/mp3/mp3_1_imdct36_float_basicsv.s b/media/audio/mp3/mp3_1_imdct36_float_basicsv.s index 8fff01d0..65af0b60 100644 --- a/media/audio/mp3/mp3_1_imdct36_float_basicsv.s +++ b/media/audio/mp3/mp3_1_imdct36_float_basicsv.s @@ -18,7 +18,7 @@ .set vin, 8 .set vin1, 9 .set vin2, 11 -.set pred, 30 +.set pred, %r30 # floats @@ -105,22 +105,22 @@ imdct36: # @imdct36 # Similarly, the values of 'in' array are already in registers 8-26 setvl 0,0,2,0,1,1 # t1 - sv.fsubs 32.v, 8.v, 20.v + sv.fsubs *32, *8, *20 # t2 - sv.fadds 35.v, 16.v, 24.v - sv.fsubs 35.v, 35.v, 12.v + sv.fadds *35, *16, *24 + sv.fsubs *35, *35, *12 # t3, SHR(a,b) = a * 1.0f/(1 << (1)) = a / 2 essentially fdiv a, a, 2.0 - sv.fdivs 38.v, 20.v, 29 - sv.fadds 38.v, 38.v, 8.v - # t4, essentially fdiv 41.v, 35.v, 29 - sv.fdivs 41.v, 35.v, 29 - sv.fsubs 41.v, 32.v, 41.v + sv.fdivs *38, *20, 29 + sv.fadds *38, *38, *8 + # t4, essentially fdiv *41, *35, 29 + sv.fdivs *41, *35, 29 + sv.fsubs *41, *32, *41 # t5 - sv.fadds 44.v, 32.v, 35.v + sv.fadds *44, *32, *35 # Use SETVL again as we want to store 18 floats to (out) setvl 0,0,18,0,1,1 - sv.stfs 32.v, 0(3) + sv.stfs *32, 0(3) blr .long 0 .quad 0 diff --git a/media/video/libvpx/vp8_dct4x4_real.s b/media/video/libvpx/vp8_dct4x4_real.s index e65642fa..1a3f1fcc 100644 --- a/media/video/libvpx/vp8_dct4x4_real.s +++ b/media/video/libvpx/vp8_dct4x4_real.s @@ -29,68 +29,68 @@ vp8_short_fdct4x4_svp64_real: sv.lha *ip, 0(in) # Load 4 ints from (in) ori pred, 0, 0b0001000100010001 - sv.add/dm=r10 *t, *ip, *ip+3 # a1 = ip[0] + ip[3] - sv.add/dm=r10 *t+1, *ip+1, *ip+2 # b1 = ip[1] + ip[2] - sv.subf/dm=r10 *t+2, *ip+2, *ip+1 # c1 = ip[1] - ip[2] - sv.subf/dm=r10 *t+3, *ip+3, *ip # d1 = ip[0] - ip[3] + sv.add/dm=%r10 *t, *ip, *ip+3 # a1 = ip[0] + ip[3] + sv.add/dm=%r10 *t+1, *ip+1, *ip+2 # b1 = ip[1] + ip[2] + sv.subf/dm=%r10 *t+2, *ip+2, *ip+1 # c1 = ip[1] - ip[2] + sv.subf/dm=%r10 *t+3, *ip+3, *ip # d1 = ip[0] - ip[3] sv.mulli *t, *t, 8 # a1 *= 8, b1 *= 8, c1 *= 8, d1 *= 8 - sv.add/dm=r10 *op, *t, *t+1 # op[0] = a1 + b1; - sv.subf/dm=r10 *op+2, *t+1, *t # op[2] = a1 - b1; + sv.add/dm=%r10 *op, *t, *t+1 # op[0] = a1 + b1; + sv.subf/dm=%r10 *op+2, *t+1, *t # op[2] = a1 - b1; # Calculate c1 * 2217, c1 *5352, d1 * 2217 and d1 * 5352 ori pred, 0, 0b1100110011001100 - sv.mulli/m=r10 *t2, *t, 2217 # t2 has c1 * 2217, d1 * 2217 - sv.mulli/m=r10 *t3, *t, 5352 # t3 has c1 * 5352, d1 * 5352 + sv.mulli/m=%r10 *t2, *t, 2217 # t2 has c1 * 2217, d1 * 2217 + sv.mulli/m=%r10 *t3, *t, 5352 # t3 has c1 * 5352, d1 * 5352 ori pred, 0, 0b0010001000100010 # op[1] = (c1 * 2217 + d1 * 5352 + 14500) - sv.add/m=r10 *op, *t2+1, *t3+2 # c1 * 2217 + d1 * 5352 - sv.addi/m=r10 *op, *op, 14500 # + 14500 + sv.add/m=%r10 *op, *t2+1, *t3+2 # c1 * 2217 + d1 * 5352 + sv.addi/m=%r10 *op, *op, 14500 # + 14500 ori pred, 0, 0b0100010001000100 # op[3] = (d1 * 2217 - c1 * 5352 + 7500) - sv.subf/m=r10 *op+1, *t3, *t2+1 # - c1 * 5352 + d1 * 2127 - sv.addi/m=r10 *op+1, *op+1, 7500 # + 7500 + sv.subf/m=%r10 *op+1, *t3, *t2+1 # - c1 * 5352 + d1 * 2127 + sv.addi/m=%r10 *op+1, *op+1, 7500 # + 7500 ori pred, 0, 0b1010101010101010 - sv.rldicl/m=r10 *op, *op, 52, 12 # op[1] >>= 12, op[3] >>= 12 + sv.rldicl/m=%r10 *op, *op, 52, 12 # op[1] >>= 12, op[3] >>= 12 # column-wise DCT ori pred, 0, 0b0000000000001111 - sv.add/m=r10 *t, *op, *op+12 # a1 = ip[0] + ip[12] - sv.add/m=r10 *t+4, *op+4, *op+8 # b1 = ip[4] + ip[8] - sv.subf/m=r10 *t+8, *op+8, *op+4 # c1 = ip[4] - ip[8] - sv.subf/m=r10 *t+12, *op+12, *op # d1 = ip[0] - ip[12] + sv.add/m=%r10 *t, *op, *op+12 # a1 = ip[0] + ip[12] + sv.add/m=%r10 *t+4, *op+4, *op+8 # b1 = ip[4] + ip[8] + sv.subf/m=%r10 *t+8, *op+8, *op+4 # c1 = ip[4] - ip[8] + sv.subf/m=%r10 *t+12, *op+12, *op # d1 = ip[0] - ip[12] # op[0] = (a1 + b1 + 7) >> 4 - sv.add/m=r10 *op, *t, *t+4 # op[0] = a1 + b1 - sv.addi/m=r10 *op, *op, 7 # op[0] += 7 + sv.add/m=%r10 *op, *t, *t+4 # op[0] = a1 + b1 + sv.addi/m=%r10 *op, *op, 7 # op[0] += 7 # op[8] = (a1 - b1 + 7) >> 4 - sv.subf/m=r10 *op+8, *t+4, *t # op[8] = a1 - b1 - sv.addi/m=r10 *op+8, *op+8, 7 # op[8] += 7 + sv.subf/m=%r10 *op+8, *t+4, *t # op[8] = a1 - b1 + sv.addi/m=%r10 *op+8, *op+8, 7 # op[8] += 7 ori pred, 0, 0b0000111100001111 - sv.rldicl/m=r10 *op, *op, 60, 4 # op[0] >>= 4, op[8] >>= 4 + sv.rldicl/m=%r10 *op, *op, 60, 4 # op[0] >>= 4, op[8] >>= 4 # Calculate c1 * 2217, c1 *5352, d1 * 2217 and d1 * 5352 ori pred, 0, 0b1111111100000000 - sv.mulli/m=r10 *t2, *t, 2217 # t2 has c1 * 2217, d1 * 2217 - sv.mulli/m=r10 *t3, *t, 5352 # t3 has c1 * 5352, d1 * 5352 + sv.mulli/m=%r10 *t2, *t, 2217 # t2 has c1 * 2217, d1 * 2217 + sv.mulli/m=%r10 *t3, *t, 5352 # t3 has c1 * 5352, d1 * 5352 # op[4] = ((c1 * 2217 + d1 * 5352 + 12000) ori pred, 0, 0b0000000011110000 - sv.add/m=r10 *op, *t2+4, *t3+8 # c1 * 2217 + d1 * 5352 - sv.addi/m=r10 *op, *op, 12000 # + 12000 + sv.add/m=%r10 *op, *t2+4, *t3+8 # c1 * 2217 + d1 * 5352 + sv.addi/m=%r10 *op, *op, 12000 # + 12000 # op[12] = (d1 * 2217 - c1 * 5352 + 51000) ori pred, 0, 0b1111000000000000 - sv.subf/m=r10 *op, *t3-4, *t2 # - c1 * 5352 + d1 * 2127 - sv.add/m=r10 *op, *op, c_51000 # + 51000 + sv.subf/m=%r10 *op, *t3-4, *t2 # - c1 * 5352 + d1 * 2127 + sv.add/m=%r10 *op, *op, c_51000 # + 51000 ori pred, 0, 0b1111000011110000 - sv.rldicl/m=r10 *op, *op, 48, 16 # op[4] >>= 16, op[12] >= 16 + sv.rldicl/m=%r10 *op, *op, 48, 16 # op[4] >>= 16, op[12] >= 16 # op[4] += (d1 != 0) #ori pred, 0, 0b0000000011110000 @@ -109,3 +109,4 @@ vp8_short_fdct4x4_svp64_real: .size vp8_short_fdct4x4_svp64_real,.-vp8_short_fdct4x4_svp64_real .ident "GCC: (Debian 8.3.0-6) 8.3.0" .section .note.GNU-stack,"",@progbits + -- 2.30.2