2 * Copyright (c) 2001, 2002 Fabrice Bellard
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #define MDCT_BUF_SIZE 40
28 #define RENAME(n) n##_float
30 static inline float round_sample(float *sum
)
37 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
38 #define MULS(ra, rb) ((ra)*(rb))
39 #define MULH3(x, y, s) ((s)*(y)*(x))
40 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
41 #define MULLx(x, y, s) ((y)*(x))
42 #define FIXHR(x) ((float)(x))
43 #define FIXR(x) ((float)(x))
44 #define SHR(a,b) ((a)*(1.0f/(1<<(b))))
47 #define C1 FIXHR(0.98480775301220805936/2)
48 #define C2 FIXHR(0.93969262078590838405/2)
49 #define C3 FIXHR(0.86602540378443864676/2)
50 #define C4 FIXHR(0.76604444311897803520/2)
51 #define C5 FIXHR(0.64278760968653932632/2)
52 #define C6 FIXHR(0.5/2)
53 #define C7 FIXHR(0.34202014332566873304/2)
54 #define C8 FIXHR(0.17364817766693034885/2)
56 /* 0.5 / cos(pi*(2*i+1)/36) */
57 static const float icos36
[9] = {
58 FIXR(0.50190991877167369479),
59 FIXR(0.51763809020504152469), //0
60 FIXR(0.55168895948124587824),
61 FIXR(0.61038729438072803416),
62 FIXR(0.70710678118654752439), //1
63 FIXR(0.87172339781054900991),
64 FIXR(1.18310079157624925896),
65 FIXR(1.93185165257813657349), //2
66 FIXR(5.73685662283492756461),
69 /* 0.5 / cos(pi*(2*i+1)/36) */
70 static const float icos36h
[9] = {
71 FIXHR(0.50190991877167369479/2),
72 FIXHR(0.51763809020504152469/2), //0
73 FIXHR(0.55168895948124587824/2),
74 FIXHR(0.61038729438072803416/2),
75 FIXHR(0.70710678118654752439/2), //1
76 FIXHR(0.87172339781054900991/2),
77 FIXHR(1.18310079157624925896/4),
78 FIXHR(1.93185165257813657349/4), //2
79 // FIXHR(5.73685662283492756461),
82 /* using Lee like decomposition followed by hand coded 9 points DCT */
83 void imdct36(float *out
, float *buf
, float *in
, const float *win
)
86 float t0
, t1
, t2
, t3
, s0
, s1
, s2
, s3
;
87 float tmp
[18], *tmp1
, *in1
;
89 /* straight sv.fadds/mr for this one, should do write-hazard-free
91 sv.fadds/mrr 11.v, 10.v, 10.v
93 for (i
= 17; i
>= 1; i
--)
96 /* this can use mrr mode with predicate 0b010101010101,
97 it will do the job, something like:
98 li r30, 0b010101010101
100 sv.fadds/mrr/m=r30 12.v, 10.v, 10.v
101 which will issue adds *in reverse* order
102 fadds 24, 26, 24 - not predicated (active)
103 fadds 23, 25, 23 - predicated (masked out)
104 fadds 22, 24, 22 - not predicated (active)
105 fadds 21, 23, 21 - predicated (masked out)
107 should result in write-hazard-free overlapping adds,
108 skipping every other element because of the predication
110 for (i
= 17; i
>= 3; i
-= 2)
113 /* all of these are independent, should be possible to do with
114 setvl 2. probably possible to break these down into some
115 sort of much smaller loop but the hand-coded nature of the
116 algorithm makes it difficult to spot the pattern.
118 for (j
= 0; j
< 2; j
++) {
122 t2
= in1
[2*4] + in1
[2*8] - in1
[2*2];
124 t3
= in1
[2*0] + SHR(in1
[2*6],1);
125 t1
= in1
[2*0] - in1
[2*6];
126 tmp1
[ 6] = t1
- SHR(t2
,1);
129 t0
= MULH3(in1
[2*2] + in1
[2*4] , C2
, 2);
130 t1
= MULH3(in1
[2*4] - in1
[2*8] , -2*C8
, 1);
131 t2
= MULH3(in1
[2*2] + in1
[2*8] , -C4
, 2);
133 tmp1
[10] = t3
- t0
- t2
;
134 tmp1
[ 2] = t3
+ t0
+ t1
;
135 tmp1
[14] = t3
+ t2
- t1
;
137 tmp1
[ 4] = MULH3(in1
[2*5] + in1
[2*7] - in1
[2*1], -C3
, 2);
138 t2
= MULH3(in1
[2*1] + in1
[2*5], C1
, 2);
139 t3
= MULH3(in1
[2*5] - in1
[2*7], -2*C7
, 1);
140 t0
= MULH3(in1
[2*3], C3
, 2);
142 t1
= MULH3(in1
[2*1] + in1
[2*7], -C5
, 2);
144 tmp1
[ 0] = t2
+ t3
+ t0
;
145 tmp1
[12] = t2
+ t1
- t0
;
146 tmp1
[ 8] = t3
- t1
- t0
;
149 /* this can be done setvl 5 and with a predicate 0b01111 for the
150 items marked "if predicate"
153 for (j
= 0; j
< 5; j
++) {
154 predicate
= (j
!= 4);
156 if (predicate
) t1
= tmp
[i
+ 2]; else t1
= 0.0;
158 if (predicate
) s2
= t1
- t0
; else s2
= 0.0;
161 if (predicate
) t3
= tmp
[i
+ 3]; else t3
= 0.0;
162 s1
= MULH3(t3
+ t2
, icos36h
[ j
], 2);
163 if (predicate
) s3
= MULLx(t3
- t2
, icos36
[8 - j
], FRAC_BITS
);
167 out
[(9 + j
) * SBLIMIT
] = MULH3(t1
, win
[ 9 + j
], 1) + buf
[4*(9 + j
)];
168 out
[(8 - j
) * SBLIMIT
] = MULH3(t1
, win
[ 8 - j
], 1) + buf
[4*(8 - j
)];
169 buf
[4 * ( 9 + j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 9 + j
], 1);
170 buf
[4 * ( 8 - j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 8 - j
], 1);
172 if (predicate
) t0
= s2
+ s3
;
173 if (predicate
) t1
= s2
- s3
;
174 if (predicate
) out
[(9 + 8 - j
) * SBLIMIT
] = MULH3(t1
, win
[ 9 + 8 - j
], 1) + buf
[4*(9 + 8 - j
)];
175 if (predicate
) out
[ j
* SBLIMIT
] = MULH3(t1
, win
[ j
], 1) + buf
[4*( j
)];
176 if (predicate
) buf
[4 * ( 9 + 8 - j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 9 + 8 - j
], 1);
177 if (predicate
) buf
[4 * ( j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + j
], 1);