pysvp64db: fix traversal
[openpower-isa.git] / media / audio / mp3 / imdct36_standalone.c
1 /*
2 * Copyright (c) 2001, 2002 Fabrice Bellard
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <stdint.h>
22 #include <stddef.h>
23
24 #define USE_FLOATS 1
25 #define SBLIMIT 32
26 #define MDCT_BUF_SIZE 40
27
28 #define RENAME(n) n##_float
29
30 static inline float round_sample(float *sum)
31 {
32 float sum1=*sum;
33 *sum = 0;
34 return sum1;
35 }
36
37 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
38 #define MULS(ra, rb) ((ra)*(rb))
39 #define MULH3(x, y, s) ((s)*(y)*(x))
40 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
41 #define MULLx(x, y, s) ((y)*(x))
42 #define FIXHR(x) ((float)(x))
43 #define FIXR(x) ((float)(x))
44 #define SHR(a,b) ((a)*(1.0f/(1<<(b))))
45
46 /* cos(pi*i/18) */
47 #define C1 FIXHR(0.98480775301220805936/2)
48 #define C2 FIXHR(0.93969262078590838405/2)
49 #define C3 FIXHR(0.86602540378443864676/2)
50 #define C4 FIXHR(0.76604444311897803520/2)
51 #define C5 FIXHR(0.64278760968653932632/2)
52 #define C6 FIXHR(0.5/2)
53 #define C7 FIXHR(0.34202014332566873304/2)
54 #define C8 FIXHR(0.17364817766693034885/2)
55
56 /* 0.5 / cos(pi*(2*i+1)/36) */
57 static const float icos36[9] = {
58 FIXR(0.50190991877167369479),
59 FIXR(0.51763809020504152469), //0
60 FIXR(0.55168895948124587824),
61 FIXR(0.61038729438072803416),
62 FIXR(0.70710678118654752439), //1
63 FIXR(0.87172339781054900991),
64 FIXR(1.18310079157624925896),
65 FIXR(1.93185165257813657349), //2
66 FIXR(5.73685662283492756461),
67 };
68
69 /* 0.5 / cos(pi*(2*i+1)/36) */
70 static const float icos36h[9] = {
71 FIXHR(0.50190991877167369479/2),
72 FIXHR(0.51763809020504152469/2), //0
73 FIXHR(0.55168895948124587824/2),
74 FIXHR(0.61038729438072803416/2),
75 FIXHR(0.70710678118654752439/2), //1
76 FIXHR(0.87172339781054900991/2),
77 FIXHR(1.18310079157624925896/4),
78 FIXHR(1.93185165257813657349/4), //2
79 // FIXHR(5.73685662283492756461),
80 };
81
82 /* using Lee like decomposition followed by hand coded 9 points DCT */
83 void imdct36(float *out, float *buf, float *in, const float *win)
84 {
85 int i, j, predicate;
86 float t0, t1, t2, t3, s0, s1, s2, s3;
87 float tmp[18], *tmp1, *in1;
88
89 /* straight sv.fadds/mr for this one, should do write-hazard-free
90 overlapping adds:
91 sv.fadds/mrr 11.v, 10.v, 10.v
92 */
93 for (i = 17; i >= 1; i--)
94 in[i] += in[i-1];
95
96 /* this can use mrr mode with predicate 0b010101010101,
97 it will do the job, something like:
98 li r30, 0b010101010101
99 setvl 16
100 sv.fadds/mrr/m=r30 12.v, 10.v, 10.v
101 which will issue adds *in reverse* order
102 fadds 24, 26, 24 - not predicated (active)
103 fadds 23, 25, 23 - predicated (masked out)
104 fadds 22, 24, 22 - not predicated (active)
105 fadds 21, 23, 21 - predicated (masked out)
106 ...
107 should result in write-hazard-free overlapping adds,
108 skipping every other element because of the predication
109 */
110 for (i = 17; i >= 3; i -= 2)
111 in[i] += in[i-2];
112
113 /* all of these are independent, should be possible to do with
114 setvl 2. probably possible to break these down into some
115 sort of much smaller loop but the hand-coded nature of the
116 algorithm makes it difficult to spot the pattern.
117 */
118 for (j = 0; j < 2; j++) {
119 tmp1 = tmp + j;
120 in1 = in + j;
121
122 t2 = in1[2*4] + in1[2*8] - in1[2*2];
123
124 t3 = in1[2*0] + SHR(in1[2*6],1);
125 t1 = in1[2*0] - in1[2*6];
126 tmp1[ 6] = t1 - SHR(t2,1);
127 tmp1[16] = t1 + t2;
128
129 t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2);
130 t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1);
131 t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2);
132
133 tmp1[10] = t3 - t0 - t2;
134 tmp1[ 2] = t3 + t0 + t1;
135 tmp1[14] = t3 + t2 - t1;
136
137 tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2);
138 t2 = MULH3(in1[2*1] + in1[2*5], C1, 2);
139 t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1);
140 t0 = MULH3(in1[2*3], C3, 2);
141
142 t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2);
143
144 tmp1[ 0] = t2 + t3 + t0;
145 tmp1[12] = t2 + t1 - t0;
146 tmp1[ 8] = t3 - t1 - t0;
147 }
148
149 /* this can be done setvl 5 and with a predicate 0b01111 for the
150 items marked "if predicate"
151 */
152 i = 0;
153 for (j = 0; j < 5; j++) {
154 predicate = (j != 4);
155 t0 = tmp[i];
156 if (predicate) t1 = tmp[i + 2]; else t1 = 0.0;
157 s0 = t1 + t0;
158 if (predicate) s2 = t1 - t0; else s2 = 0.0;
159
160 t2 = tmp[i + 1];
161 if (predicate) t3 = tmp[i + 3]; else t3 = 0.0;
162 s1 = MULH3(t3 + t2, icos36h[ j], 2);
163 if (predicate) s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS);
164
165 t0 = s0 + s1;
166 t1 = s0 - s1;
167 out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)];
168 out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)];
169 buf[4 * ( 9 + j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1);
170 buf[4 * ( 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1);
171
172 if (predicate) t0 = s2 + s3;
173 if (predicate) t1 = s2 - s3;
174 if (predicate) out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
175 if (predicate) out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)];
176 if (predicate) buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1);
177 if (predicate) buf[4 * ( j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + j], 1);
178 i += 4;
179 }
180 }