pysvp64db: fix traversal
[openpower-isa.git] / media / audio / mp3 / mp3_0_apply_window_float_basicsv.s
1 # ffmpeg lgpl 2.1 or later
2 #
3 # some instructions could be saved by using fmac (sv.fmadds, sv.fnmsubs)
4 # but the accuracy is so high it produces different results. this
5 # demo therefore uses fmuls followed by fmsub/fmadd in map-reduce mode
6 # also note, the FP registers are overwritten, not saved on stack yet.
7 # at some point 128 registers will be available, meaning that an EABI
8 # will be defined where there will be plenty of temporaries and no need
9 # to store 24 FP regs on the stack.
10
11 # ints
12 .set buf, 3
13 .set win, 4
14 .set out, 6
15 .set incr, 7
16
17 .set p, 5
18 .set i, 8
19 .set out2, 10
20
21 # SV ints, so we don't have to play with the stack
22 #.set win2, 32
23 # for now... TODO, add 128 regs to simulator
24 .set win2, 16
25
26 # SV floats
27 .set fv0, 32
28 .set fv1, 40
29 .set fv2, 48
30
31 # floats
32 .set sum, 0
33 .set sum2, 1
34
35 .machine libresoc
36 .abiversion 2
37 .section ".text"
38 .align 2
39 .p2align 4,,15
40 .globl ff_mpadsp_apply_window_float_sv
41 .type ff_mpadsp_apply_window_float_sv, @function
42 ff_mpadsp_apply_window_float_sv:
43 .LCF0:
44 addis 2,12,.TOC.-.LCF0@ha
45 addi 2,2,.TOC.-.LCF0@l
46
47 addis 9,2,.LC0@toc@ha
48 addi 9,9,.LC0@toc@l
49
50 # samples2 = samples + 31 * incr;
51 slwi incr, incr, 2 # incr *= 4, sizeof float
52 mulli 0, incr, 31
53 add out2, out, 0
54
55 # set Vector Length
56 setvl 0, 0, 8, 1, 1, 0# setvli MVL=8, VL=8
57 addi win2, win, 124 # w2 = window + 31
58
59 lfiwax sum, 0, 5 # sum = *dither_state
60 addi p, buf, 64 # p = synth_buf+16
61
62 # SUM8(MACS, sum, w, p)
63 sv.lfs/els *fv0, 256(win)
64 sv.lfs/els *fv1, 256(p)
65 # TOO ACCURATE! hilarious sv.fmadds/mr sum, *fv0, *fv1, sum
66 sv.fmuls *fv0, *fv0, *fv1
67 sv.fadds/mr sum, *fv0, sum
68
69 addi p, buf, 192 # p = synth_buf + 48;
70 addi win, win, 128 # w = w + 32
71 # SUM8(MLSS, sum, w + 32, p)
72 sv.lfs/els *fv0, 256(win)
73 sv.lfs/els *fv1, 256(p)
74 # TOO ACCURATE! hilarious sv.fnmsubs/mr sum, *fv0, *fv1, sum
75 sv.fmuls *fv0, *fv0, *fv1
76 sv.fsubs/mr sum, sum, *fv0
77 addi win, win, -128 # w = w - 32
78
79 stfs sum, 0(out) # *samples = &sum
80 add out, out, incr # samples += incr
81 addi win, win, 4 # w++
82
83 # Loop 15 times
84 li 0, 15
85 mtctr 0
86 li i, 4 # loop starts at 1: (for j=1;j<16;j++)
87 .Lloop:
88 lfiwax sum, 0, 9 # zero it
89 lfiwax sum2, 0, 9 # zero it
90
91 # p = synth_buf + 16 + j
92 addi p, buf, 64
93 add p, p, i
94
95 # SUM8P2(sum, MACS, sum2, MLSS, w, w2, p)
96 sv.lfs/els *fv0, 256(p)
97 sv.lfs/els *fv1, 256(win)
98 sv.lfs/els *fv2, 256(win2)
99 # TOO ACCURATE! hilarious sv.fmadds/mr sum, *fv0, *fv1, sum
100 # TOO ACCURATE! hilarious sv.fnmsubs/mr sum2, *fv0, *fv2, sum2
101 sv.fmuls *fv1, *fv0, *fv1
102 sv.fadds/mr sum, sum, *fv1
103 sv.fmuls *fv0, *fv0, *fv2
104 sv.fsubs/mr sum2, sum2, *fv0
105
106 # p = synth_buf + 48 - j
107 addi p, buf, 192
108 subf p, i, p
109
110 # win and win2 += 32
111 addi win, win, 128
112 addi win2, win2, 128
113
114 # SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p)
115 sv.lfs/els *fv0, 256(p)
116 sv.lfs/els *fv1, 256(win)
117 sv.lfs/els *fv2, 256(win2)
118 # TOO ACCURATE! hilarious sv.fnmsubs/mr sum, *fv0, *fv1, sum
119 # TOO ACCURATE! hilarious sv.fnmsubs/mr sum2, *fv0, *fv2, sum2
120 sv.fmuls *fv1, *fv0, *fv1
121 sv.fsubs/mr sum, sum, *fv1
122 sv.fmuls *fv0, *fv0, *fv2
123 sv.fsubs/mr sum2, sum2, *fv0
124
125 # win and win2 -= 32
126 addi win, win, -128
127 addi win2, win2, -128
128
129 stfs sum, 0(out)
130 add out, out, incr # samples += incr
131 stfs sum2, 0(out2)
132 subf out2, incr, out2 # samples2 -= incr
133
134 addi i, i, 4 # for-loop j=1..15
135 addi win, win, 4 # w++
136 addi win2, win2, -4 # w2--
137 bdnz .Lloop
138
139 addi p, buf, 128 # p = synth_buf + 32
140 addi win, win, 128 # w += 32
141 lfiwax sum, 0, 9 # zero it
142 # SUM8(MLSS, sum, w + 32, p)
143 sv.lfs/els *fv0, 256(win)
144 sv.lfs/els *fv1, 256(p)
145 # TOO ACCURATE! hilarious sv.fnmsubs/mr sum, *fv0, *fv1, sum
146 sv.fmuls *fv0, *fv0, *fv1
147 sv.fsubs/mr sum, sum, *fv0
148
149 stfs sum, 0(out)
150
151 blr
152
153 .size ff_mpadsp_apply_window_float_sv,.-ff_mpadsp_apply_window_float_sv
154
155 .section .rodata
156 .align 2
157 .LC0:
158 .long 0